Source code for stringdb.api

"""api module. Functions to access the STRING API"""
import pandas as pd
import io
import requests


[docs]def build_request_url(method, output_format='tsv'): """Create url to query the string database Allows us to create stubs for querying the string api with various methods Parameters ---------- method: str options - get_string_ids, network, interaction_partners, homology, homology_best, enrichment, functional_annotation, ppi_enrichment, version output_format: str, optional options - tsv, tsv-non-header, json, xml Returns ------- str request URL """ string_api_url = "https://string-db.org/api" request_url = "/".join([string_api_url, output_format, method]) return request_url
[docs]def handle_results(results): """Handle results returned from string""" if results.ok: data = results.content.decode('utf8') # assumes tsv input df = pd.read_csv(io.StringIO(data), sep='\t') return df else: raise ValueError(results.reason)
[docs]def get_string_ids(identifiers, species=9606, limit=1, echo_query=1, caller_identity='https://github.com/gpp-rnd/stringdb'): """Map gene symbols to string ids Parameters ---------- identifiers: list gene symbols to map to string ids species: int, optional species identifier limit: int, optional limits the number of matches per query (best match comes first) echo_query: int, optional insert column with input identifiers. Takes values 0 or 1 (boolean) caller_identity: str, optional personal identifier for string Returns ------- DataFrame mapping of string ids """ request_url = build_request_url("get_string_ids") params = { "identifiers": "\r".join(identifiers), # your protein list "species": species, # species NCBI identifier "limit": limit, # only one (best) identifier per input protein "echo_query": echo_query, # see your input identifiers in the output "caller_identity": caller_identity } results = requests.post(request_url, data=params) df = handle_results(results) return df
[docs]def get_functional_annotation(identifiers, species=9606, caller_identity='https://github.com/gpp-rnd/stringdb', allow_pubmed=0): """Get all pathways for a list of string ids Parameters ---------- identifiers: list list of string ids species: int, optional species NCBI identifier caller_identity: str, optional personal identifier for string allow_pubmed: int, optional include pubmed articles, options - 1 or 0 Returns ------- DataFrame mapping between string ids and gene pathways """ request_url = build_request_url("functional_annotation") pathway_list = [] # limited to 2000 queries by string for i in range(0, len(identifiers), 2000): curr_ids = identifiers[i:(i + 2000)] params = { "identifiers": "\r".join(curr_ids), # protein list "species": species, "caller_identity": caller_identity, "allow_pubmed": allow_pubmed } results = requests.post(request_url, data=params) df = handle_results(results) pathway_list.append(df) pathway_df = pd.concat(pathway_list) return pathway_df
[docs]def get_network(identifiers, species=9606, required_score=400, caller_identity='https://github.com/gpp-rnd/stringdb', add_nodes=0): """Get the ppi network for a list of string ids Parameters ---------- identifiers: list list of string ids species: int, optional species NCBI identifier required_score: int, optional score cutoff for edges, corresponds to probability of belonging to same kegg pathway caller_identity: str, optional personal identifier for string add_nodes: int, optional number of nodes to add to the network based on confidence Returns ------- DataFrame network edges """ request_url = build_request_url("network") params = { "identifiers": "\r".join(identifiers), # your protein list "species": species, # species NCBI identifier "caller_identity": caller_identity, "required_score": required_score, "add_nodes": add_nodes } results = requests.post(request_url, data=params) df = handle_results(results) return df
[docs]def get_ppi_enrichment(identifiers, species=9606, required_score=400, caller_identity='https://github.com/gpp-rnd/stringdb'): """Calculate ppi enrichment Parameters ---------- identifiers: list list of string ids species: int, optional species NCBI identifier required_score: int, optional score cutoff for edges, corresponds to probability of belonging to same kegg pathway caller_identity: str, optional personal identifier for string Returns ------- DataFrame one row DataFrame with ppi enrichment stats """ request_url = build_request_url("ppi_enrichment") params = { "identifiers": "\r".join(identifiers), # your protein list "species": species, # species NCBI identifier "required_score": required_score, "caller_identity": caller_identity } results = requests.post(request_url, data=params) df = handle_results(results) return df
[docs]def get_interaction_partners(identifiers, species=9606, required_score=400, limit=None, caller_identity='https://github.com/gpp-rnd/stringdb'): """Get interactions for identified proteins and all other string proteins Parameters ---------- identifiers: list list of string ids species: int, optional species NCBI identifier required_score: int, optional score cutoff for edges, corresponds to probability of belonging to same kegg pathway limit: int, optional limit the number of interactors returned for each protein, ranked by score caller_identity: str, optional personal identifier for string Returns ------- DataFrame """ request_url = build_request_url("interaction_partners") params = { "identifiers": "\r".join(identifiers), # your protein list "species": species, # species NCBI identifier "required_score": required_score, "caller_identity": caller_identity, } if limit is not None: params['limit'] = limit results = requests.post(request_url, data=params) df = handle_results(results) return df
[docs]def get_enrichment(identifiers, background_string_identifiers=None, species=9606, caller_identity='https://github.com/gpp-rnd/stringdb'): """Get functional enrichment for a list of proteins Parameters ---------- identifiers: list list of string ids background_string_identifiers: list list of string ids to use as background species: int, optional species NCBI identifier caller_identity: str, optional personal identifier for string Returns ------- DataFrame enriched pathways """ request_url = build_request_url("enrichment") params = { "identifiers": "\r".join(identifiers), # your protein list "species": species, # species NCBI identifier "caller_identity": caller_identity, } if background_string_identifiers is not None: params['background_string_identifiers'] = "\r".join(background_string_identifiers) results = requests.post(request_url, data=params) df = handle_results(results) return df