Refactor Document container

dotimplement · Nov 5, 2024 · da84e60 · da84e60
1 parent 557a4dc
commit da84e60
Show file tree

Hide file tree

Showing 15 changed files with 256 additions and 244 deletions.
diff --git a/docs/reference/pipeline/data_container.md b/docs/reference/pipeline/data_container.md
@@ -31,12 +31,12 @@ from healthchain.io.containers import Document
 doc = Document("OpenAI released GPT-4 in 2023.")
 
 # Basic text operations
-print(f"Char count: {doc.char_count()}")
-print(f"Word count: {doc.word_count()}")
+print(f"Char count: {doc.nlp.char_count()}")
+print(f"Word count: {doc.nlp.word_count()}")
 
 # Access tokens and entities (requires spaCy preprocessing)
-print(f"Tokens: {doc.tokens}")
-print(f"Entities: {doc.get_entities()}")
+print(f"Tokens: {doc.nlp.get_tokens()}")
+print(f"Entities: {doc.nlp.get_entities()}")
 
 # Iterate over tokens
 for token in doc:

diff --git a/docs/reference/pipeline/integrations.md b/docs/reference/pipeline/integrations.md
@@ -72,7 +72,7 @@ doc = Document("This is a test sentence.")
 processed_doc = pipeline(doc)
 
 # Access spaCy annotations
-spacy_doc = processed_doc.get_spacy_doc()
+spacy_doc = processed_doc.nlp.get_spacy_doc()
 for token in spacy_doc:https://github.com/dotimplement/HealthChain
     print(f"Token: {token.text}, POS: {token.pos_}, Lemma: {token.lemma_}")
 ```
@@ -107,7 +107,7 @@ doc = Document("I love using HealthChain for my NLP projects!")
 processed_doc = pipeline(doc)
 
 # Access Hugging Face output
-sentiment_result = processed_doc.get_huggingface_output("sentiment-analysis")
+sentiment_result = processed_doc.models.get_output("huggingface", "sentiment-analysis")
 print(f"Sentiment: {sentiment_result}")
 ```
 
@@ -165,7 +165,7 @@ doc = Document("HealthChain is a powerful package for building NLP pipelines. It
 processed_doc = pipeline(doc)
 
 # What summary did we get?
-summary = processed_doc.get_langchain_output("chain_output")
+summary = processed_doc.models.get_output("langchain", "chain_output")
 print(f"Summary: {summary}")
 ```
 
@@ -204,9 +204,9 @@ doc = Document("HealthChain makes it easy to build powerful NLP pipelines!")
 processed_doc = pipeline(doc)
 
 # Let's see what we got!
-spacy_doc = processed_doc.get_spacy_doc()
-sentiment = processed_doc.get_huggingface_output("sentiment-analysis")
-summary = processed_doc.get_langchain_output("chain_output")
+spacy_doc = processed_doc.nlp.get_spacy_doc()
+sentiment = processed_doc.models.get_output("huggingface", "sentiment-analysis")
+summary = processed_doc.models.get_output("langchain", "chain_output")
 
 print(f"Tokens: {[token.text for token in spacy_doc]}")
 print(f"Sentiment: {sentiment}")

diff --git a/healthchain/io/cdaconnector.py b/healthchain/io/cdaconnector.py
@@ -1,7 +1,6 @@
 import logging
 
 from healthchain.io.containers import Document
-from healthchain.io.containers.document import StructuredData
 from healthchain.io.base import BaseConnector
 from healthchain.cda_parser import CdaAnnotator
 from healthchain.models.data import CcdData, ConceptLists
@@ -70,9 +69,10 @@ def input(self, in_data: CdaRequest) -> Document:
             note=note_text,
         )
 
-        return Document(
-            data=ccd_data.note, structured_docs=StructuredData(ccd_data=ccd_data)
-        )
+        doc = Document(data=ccd_data.note)
+        doc.hl7.set_ccd_data(ccd_data)
+
+        return doc
 
     def output(self, out_data: Document) -> CdaResponse:
         """

diff --git a/healthchain/io/cdsfhirconnector.py b/healthchain/io/cdsfhirconnector.py
@@ -2,7 +2,6 @@
 
 from healthchain.io.containers import Document
 from healthchain.io.base import BaseConnector
-from healthchain.io.containers.document import StructuredData
 from healthchain.models.data.cdsfhirdata import CdsFhirData
 from healthchain.models.requests.cdsrequest import CDSRequest
 from healthchain.models.responses.cdsresponse import CDSResponse
@@ -67,10 +66,10 @@ def input(self, in_data: CDSRequest) -> Document:
         except Exception as e:
             raise ValueError("Invalid prefetch data provided: {e}!") from e
 
-        return Document(
-            data=str(cds_fhir_data.model_dump_prefetch()),
-            structured_docs=StructuredData(fhir_data=cds_fhir_data),
-        )
+        doc = Document(data=str(cds_fhir_data.model_dump_prefetch()))
+        doc.hl7.set_fhir_data(cds_fhir_data)
+
+        return doc
 
     def output(self, out_data: Document) -> CDSResponse:
         """
@@ -92,8 +91,10 @@ def output(self, out_data: Document) -> CDSResponse:
             - If out_data.cds_cards is None, a warning is logged and an empty list of cards is returned.
             - System actions (out_data.cds_actions) are included in the response if present.
         """
-        if out_data.cds.cards is None:
+        if out_data._cds._cards is None:
             log.warning("No CDS cards found in Document, returning empty list of cards")
             return CDSResponse(cards=[])
 
-        return CDSResponse(cards=out_data.cds.cards, systemActions=out_data.cds.actions)
+        return CDSResponse(
+            cards=out_data.cds.get_cards(), systemActions=out_data.cds.get_actions()
+        )