Python Package Documentation

Info

BioModelsRAG is a tool to analyze BioModels for researchers. BioRAG requires the following packages as well as a local version of Ollama, which can be downloaded from the Ollama website and installed using pip install in your IDE terminal.

Required Packages:

requests

tellurium

ollama

langchain_text_splitters

chromadb

Modules

biomodelcacheretrieval

The The class biomodelcacheretrieval queries the BioModels cache using key words and downloads the relevant BioModels.

                    

        class BioModelCacheRetrieval:
            def __init__(self, search_str, github_owner="TheBobBob", github_repo_cache="BiomodelsCache"):
                self.search_str = search_str
                self.github_owner = github_owner
                self.github_repo_cache = github_repo_cache
            
            def search_models(self):
                BIOMODELS_JSON_DB_PATH = "src/cached_biomodels.json"
                cached_data = None
                
                url = f"https://api.github.com/repos/{self.github_owner}/{self.github_repo_cache}/contents/{BIOMODELS_JSON_DB_PATH}"
                headers = {"Accept": "application/vnd.github+json"}
                response = requests.get(url, headers=headers)
                
                if response.status_code == 200:
                    data = response.json()
                    if "download_url" in data:
                        file_url = data["download_url"]
                        json_response = requests.get(file_url)
                        cached_data = json_response.json()
                else:
                    print(f"Failed to retrieve data from GitHub. Status code: {response.status_code}")
                    return {}
        
                query_text = self.search_str.strip().lower()
                models = {}
        
                if cached_data:
                    for model_id, model_data in cached_data.items():
                        if 'name' in model_data:
                            name = model_data['name'].lower()
                            url = model_data['url']
                            id = model_data['model_id']
                            title = model_data['title']
                            authors = model_data['authors']
                            model_info = ' '.join([str(v).lower() for v in model_data.values()])
        
                            if query_text in model_info and model_id not in models:
                                models[model_id] = {
                                    'ID': model_id,
                                    'name': name,
                                    'url': url,
                                    'id': id,
                                    'title': title,
                                    'authors': authors,
                                }
                return models
        
            @staticmethod
            def download_model_files(model_url, model_id):
                LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()
                model_url = f"https://raw.githubusercontent.com/TheBobBob/BiomodelsStore/main/biomodels/{model_id}/{model_id}_url.xml"
                try:
                    response = requests.get(model_url)
                    if response.status_code == 200:
                        os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True)
                        file_path = os.path.join(LOCAL_DOWNLOAD_DIR, f"{model_id}.xml")
                        
                        with open(file_path, 'wb') as file:
                            file.write(response.content)
                        
                        print(f"Model {model_id} downloaded successfully: {file_path}")
                        return file_path
                    else:
                        print(f"Failed to download the model from {model_url}. Status code: {response.status_code}")
                        return None
                except Exception as e:
                    print(f"Error downloading model {model_id} from {model_url}: {e}")
                    return None

                    
                

convert_sbml_to_antimony

The convert_sbml_to_antimony function converts the SBML format models into Antimony for the text splitter.

                    
from convert_sbml_to_antimony import convert_sbml_to_antimony
if model_file_path:
    # Step 3: Convert the downloaded SBML model to Antimony format
    antimony_file_path = os.path.join(LOCAL_DOWNLOAD_DIR, f"{model_id}.txt")
    convert_sbml_to_antimony(model_file_path, antimony_file_path)
                    
                

split_biomodels

The split_biomodels function splits the downloaded Antimony format models for use by the text splitter.

                    
from split_biomodels import split_biomodels
final_items = split_biomodels(antimony_file_path)
all_final_items=[]
all_final_items.extend(final_items)

create_vector_db

The create_vector_db function creates the vector database that the biomodels are passed into. Before they are passed in, each segment of biomodel is summarized using an LLM so it is easier to query using the cosine similarity.

                    
from create_vector_db import create_vector_db
if all_final_items:
    db = create_vector_db(all_final_items)

generate_response

The generate_response function queries the vector database and generates a response to a user-provided query.

                    
from generate_response import generate_response
query_text = input("Enter your question about the model(s): ")
response = generate_response(db, query_text)
print(f"Response: {response}")

Main

Below is an example of how to structure each function into the full pipeline. The pipeline below does the following....

Gets user input for a search query.
Searches models based on the query.
Downloads and processes each model.
Converts models to Antimony format and splits them.
Creates a vector database from the split biomodels.
Queries the database and generates a response based on user input.

It can also be broken down into parts to use each function separately.

                
from biomodelcache import BioModelCacheRetrieval 
 
from convert_sbml_to_antimony import convert_sbml_to_antimony 
 
from split_biomodels import split_biomodels 
 
from create_vector_db import create_vector_db 
 
from generate_response import generate_response 
 
import os 
 
import tempfile 

LOCAL_DOWNLOAD_DIR = tempfile.mkdtemp()  

def main():

    retriever = BioModelCacheRetrieval(search_str)

    models = retriever.search_models()

    if models:

        all_final_items = []

        for model_id, model_data in models.items():

            print(f"Processing model: {model_data['name']}")

            model_url = model_data['url']

            model_file_path = retriever.download_model_files(model_url, model_id)

            if model_file_path:

                antimony_file_path = os.path.join(LOCAL_DOWNLOAD_DIR, f"{model_id}.txt")

                convert_sbml_to_antimony(model_file_path, antimony_file_path)

                final_items = split_biomodels(antimony_file_path)

                all_final_items.extend(final_items)

        if all_final_items:

            db = create_vector_db(all_final_items)

            query_text = input("Enter your question about the model(s): ")

            response = generate_response(db, query_text)

            print(f"Response: {response}")

        else:

            return ValueError("No models were processed successfully.")

    else:

        return ValueError("No models found matching your search query.")

if __name__ == "__main__":

    search_str = input("Enter search query: ")

    main(search_str)