Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/whitead/sanclone
Browse files Browse the repository at this point in the history
  • Loading branch information
albertqu committed Oct 6, 2023
2 parents 5d535e3 + a453236 commit f200c0b
Show file tree
Hide file tree
Showing 10 changed files with 180 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Allow lines to be as long as 120 characters.
line-length = 120
3 changes: 3 additions & 0 deletions sanclone/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from agent import SanCloneAgent

__all__ = ["SanCloneAgent"]
41 changes: 41 additions & 0 deletions sanclone/agent/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from langchain.agents import AgentExecutor, ZeroShotAgent
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.chat_models import ChatOpenAI

from ..tools import make_tools
from .prompt import prompt_template


class AgentType:
valid_models = {
"ReactAgent": ZeroShotAgent,
"OpenAIFunctionsAgent": OpenAIFunctionsAgent,
}

@classmethod
def get_agent(cls, model_name: str = "ReactAgent"):
return cls.valid_models[model_name]


class SanCloneAgent:
def __init__(
self,
tools=None,
llm=None,
openai_api_key=None,
temp=0.1,
agent_type: str = "OpenAIFunctionsAgent",
verbose=True,
):
llm = ChatOpenAI(temperature=0.0, model="gpt-4", client=None)

tools = make_tools(llm)
self.agent_instance = AgentExecutor.from_agent_and_tools(
tools=tools,
agent=AgentType.get_agent(agent_type).from_llm_and_tools(llm, tools),
return_intermediate_steps=True,
handle_parsing_errors=True,
)

def run(self, prompt: str):
return self.agent_instance.run(prompt_template.format(input=prompt))
8 changes: 8 additions & 0 deletions sanclone/agent/prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# flake8: noqa
prompt_template = """
You are an expert in molecular cloning.
You have a set of tools at your disposal.
Your task is to respond to the question or
solve the problem to the best of your ability using the provided tools.
Here is the question: {input}
"""
37 changes: 37 additions & 0 deletions sanclone/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
import click

WELCOME = """
Welcome to San Clone 👋 a molecular cloning agent 🧬.
Give it an instruction like "Clone NADH Oxidase from Streptococcus pyogenes into pET16b"
and press ✨ enter ✨
"""


@click.command()
def main():
# check openai key
try:
from langchain.llms import OpenAI

OpenAI(model="babbage-002")
except Exception as e:
if "OPENAI_API_KEY" in str(e):
print("You need to set your OPENAI_API_KEY environment variable.")
print("You can get one from https://beta.openai.com/")
print("Then run the following command:")
print("export OPENAI_API_KEY=<your key>")
print("You can add this to your ~/.bashrc or ~/.bash_profile")
return
print(WELCOME)
while True:
instruction = input(">")
if instruction == "exit" or instruction == "quit" or instruction == "q":
print("Goodbye 👋")
break
else:
pass


if __name__ == "__main__":
main()
52 changes: 49 additions & 3 deletions sanclone/state.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,51 @@
from pydantic import BaseModel
# -*- coding: utf-8 -*-
"""get_seq_annotation.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1dJEOj6Jw3qOHsxcsP-W3Uj7mbKhvWnHi
"""

from Bio import Entrez
from Bio import SeqIO


class State:
def __init__(self):
self.vector = None
self.linear_insert = None
self.clone_seq = None

def store_vector(self, vector):
if not isinstance(vector, SeqIO.SeqRecord):
raise ValueError("Input 'vector' must be a SeqRecord object from Biopython's SeqIO.")
self.vector = vector

def store_linear_insert(self, linear_insert):
# Ensure the linear_insert is a SeqRecord object from Biopython
if not isinstance(linear_insert, SeqIO.SeqRecord):
raise ValueError("Input 'linear_insert' must be a SeqRecord object from Biopython's SeqIO.")
self.linear_insert = linear_insert

def store_clone_seq(self, clone_seq):
# Ensure the linear_insert is a SeqRecord object from Biopython
self.clone_seq = clone_seq

def retrieve_vector(self):
return self.vector

def retrieve_linear_insert(self):
return self.linear_insert

def retrieve_clone_seq(self):
return self.clone_seq
# seq to annotation


def download_genbank_file(accession, filename):
Entrez.email = "[email protected]" # Always provide your email address when using NCBI's services
with Entrez.efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") as handle:
with open(filename, 'w') as outfile:
outfile.write(handle.read())

class State(BaseModel):
pass
3 changes: 2 additions & 1 deletion sanclone/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .echo import EchoTool
from .maketools import make_tools

__all__ = ["EchoTool"]
__all__ = ["EchoTool", "make_tools"]
11 changes: 11 additions & 0 deletions sanclone/tools/maketools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from langchain import agents
from langchain.llms.base import BaseLanguageModel


def make_tools(llm: BaseLanguageModel):
# add human input tool
tools = agents.load_tools(["human"], llm)

# append tools here
tools += []
return tools
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
url="https://github.com/whitead/sanclone",
license="MIT",
packages=["sanclone", "sanclone.tools"],
install_requires=["langchain", "biopython"],
install_requires=["langchain", "biopython", "click"],
entry_points={"console_scripts": ["sanclone = sanclone.main:main"]},
test_suite="tests",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
25 changes: 25 additions & 0 deletions tests/test_sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,28 @@ def test_echo_tool():

tool = EchoTool(shared_state=State())
assert tool.run("Hello") == "Hello"

# def test_state_tool():
# from sanclone import State
# from sanclone.State import download_genbank_file
# accession_id_vector = "NC_005213"
# output_filename_vector = "NC_005213.gbk"
# accession_id_linear_insert = "NC_000932"
# output_filename_linear_insert = "NC_000932.gbk"
# download_genbank_file(accession_id_vector, output_filename_vector)
# download_genbank_file(accession_id_linear_insert, output_filename_linear_insert)

# for gb_record in SeqIO.parse(open(output_filename_linear_insert,"r"), "genbank") :
# # now do something with the record
# print ("Name %s, %i features" % (gb_record.name, len(gb_record.features)))

# vector_seq = list(SeqIO.parse(open(output_filename_vector,"r"), "genbank"))
# insert_seq = list(SeqIO.parse(open(output_filename_linear_insert,"r"), "genbank"))

# seq_anno = State(vector_seq[0])
# seq_anno.store_linear_insert(insert_seq[0])

# retrieved_vector = seq_anno.retrieve_vector()
# retrieved_insert = seq_anno.retrieve_linear_insert()
# print(retrieved_vector)
# print(retrieved_insert)

0 comments on commit f200c0b

Please sign in to comment.