Skip to content

Commit

Permalink
Refactor Document container
Browse files Browse the repository at this point in the history
  • Loading branch information
jenniferjiangkells committed Nov 5, 2024
1 parent 557a4dc commit da84e60
Show file tree
Hide file tree
Showing 15 changed files with 256 additions and 244 deletions.
8 changes: 4 additions & 4 deletions docs/reference/pipeline/data_container.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ from healthchain.io.containers import Document
doc = Document("OpenAI released GPT-4 in 2023.")

# Basic text operations
print(f"Char count: {doc.char_count()}")
print(f"Word count: {doc.word_count()}")
print(f"Char count: {doc.nlp.char_count()}")
print(f"Word count: {doc.nlp.word_count()}")

# Access tokens and entities (requires spaCy preprocessing)
print(f"Tokens: {doc.tokens}")
print(f"Entities: {doc.get_entities()}")
print(f"Tokens: {doc.nlp.get_tokens()}")
print(f"Entities: {doc.nlp.get_entities()}")

# Iterate over tokens
for token in doc:
Expand Down
12 changes: 6 additions & 6 deletions docs/reference/pipeline/integrations.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ doc = Document("This is a test sentence.")
processed_doc = pipeline(doc)

# Access spaCy annotations
spacy_doc = processed_doc.get_spacy_doc()
spacy_doc = processed_doc.nlp.get_spacy_doc()
for token in spacy_doc:https://github.com/dotimplement/HealthChain
print(f"Token: {token.text}, POS: {token.pos_}, Lemma: {token.lemma_}")
```
Expand Down Expand Up @@ -107,7 +107,7 @@ doc = Document("I love using HealthChain for my NLP projects!")
processed_doc = pipeline(doc)

# Access Hugging Face output
sentiment_result = processed_doc.get_huggingface_output("sentiment-analysis")
sentiment_result = processed_doc.models.get_output("huggingface", "sentiment-analysis")
print(f"Sentiment: {sentiment_result}")
```

Expand Down Expand Up @@ -165,7 +165,7 @@ doc = Document("HealthChain is a powerful package for building NLP pipelines. It
processed_doc = pipeline(doc)

# What summary did we get?
summary = processed_doc.get_langchain_output("chain_output")
summary = processed_doc.models.get_output("langchain", "chain_output")
print(f"Summary: {summary}")
```

Expand Down Expand Up @@ -204,9 +204,9 @@ doc = Document("HealthChain makes it easy to build powerful NLP pipelines!")
processed_doc = pipeline(doc)

# Let's see what we got!
spacy_doc = processed_doc.get_spacy_doc()
sentiment = processed_doc.get_huggingface_output("sentiment-analysis")
summary = processed_doc.get_langchain_output("chain_output")
spacy_doc = processed_doc.nlp.get_spacy_doc()
sentiment = processed_doc.models.get_output("huggingface", "sentiment-analysis")
summary = processed_doc.models.get_output("langchain", "chain_output")

print(f"Tokens: {[token.text for token in spacy_doc]}")
print(f"Sentiment: {sentiment}")
Expand Down
8 changes: 4 additions & 4 deletions healthchain/io/cdaconnector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging

from healthchain.io.containers import Document
from healthchain.io.containers.document import StructuredData
from healthchain.io.base import BaseConnector
from healthchain.cda_parser import CdaAnnotator
from healthchain.models.data import CcdData, ConceptLists
Expand Down Expand Up @@ -70,9 +69,10 @@ def input(self, in_data: CdaRequest) -> Document:
note=note_text,
)

return Document(
data=ccd_data.note, structured_docs=StructuredData(ccd_data=ccd_data)
)
doc = Document(data=ccd_data.note)
doc.hl7.set_ccd_data(ccd_data)

return doc

def output(self, out_data: Document) -> CdaResponse:
"""
Expand Down
15 changes: 8 additions & 7 deletions healthchain/io/cdsfhirconnector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from healthchain.io.containers import Document
from healthchain.io.base import BaseConnector
from healthchain.io.containers.document import StructuredData
from healthchain.models.data.cdsfhirdata import CdsFhirData
from healthchain.models.requests.cdsrequest import CDSRequest
from healthchain.models.responses.cdsresponse import CDSResponse
Expand Down Expand Up @@ -67,10 +66,10 @@ def input(self, in_data: CDSRequest) -> Document:
except Exception as e:
raise ValueError("Invalid prefetch data provided: {e}!") from e

return Document(
data=str(cds_fhir_data.model_dump_prefetch()),
structured_docs=StructuredData(fhir_data=cds_fhir_data),
)
doc = Document(data=str(cds_fhir_data.model_dump_prefetch()))
doc.hl7.set_fhir_data(cds_fhir_data)

return doc

def output(self, out_data: Document) -> CDSResponse:
"""
Expand All @@ -92,8 +91,10 @@ def output(self, out_data: Document) -> CDSResponse:
- If out_data.cds_cards is None, a warning is logged and an empty list of cards is returned.
- System actions (out_data.cds_actions) are included in the response if present.
"""
if out_data.cds.cards is None:
if out_data._cds._cards is None:
log.warning("No CDS cards found in Document, returning empty list of cards")
return CDSResponse(cards=[])

return CDSResponse(cards=out_data.cds.cards, systemActions=out_data.cds.actions)
return CDSResponse(
cards=out_data.cds.get_cards(), systemActions=out_data.cds.get_actions()
)
Loading

0 comments on commit da84e60

Please sign in to comment.