Skip to content

Commit

Permalink
Modified functions to use clarifai v10.0.1 (#24)
Browse files Browse the repository at this point in the history
* modified batch_size args for ingestion script

* Update image_ingestion_clarifai_job_nb.ipynb

* modified export inputs to df function

* addressed clarifai v10.0.1 changes

* Modified functions to include text annotations
  • Loading branch information
mogith-pn authored Jan 18, 2024
1 parent b1cf0c3 commit 691aa30
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 11 deletions.
6 changes: 3 additions & 3 deletions Job_script/image_ingestion_clarifai_job_nb.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
"from clarifaipyspark.client import ClarifaiPySpark\n",
"\n",
"\n",
"def ingest_image_dataframe(folder_path, user_id, app_id, dataset_id):\n",
"def ingest_image_into_app(folder_path, user_id, app_id, dataset_id):\n",
" \n",
" cspark_obj = ClarifaiPySpark(user_id=user_id, app_id=app_id, pat=dbutils.secrets.get(\"clarifai\",\"CLARIFAI_PAT\"))\n",
" dataset_obj = cspark_obj.dataset(dataset_id=dataset_id)\n",
" try:\n",
" dataset_obj.upload_dataset_from_folder(folder_path,input_type='image',labels= False)\n",
" dataset_obj.upload_dataset_from_folder(folder_path,input_type='image',labels= False, batch_size = 15)\n",
" \n",
" except Exception as e:\n",
" raise RuntimeError(f\"Error uploading images: {e}\")\n",
Expand All @@ -42,7 +42,7 @@
"app_id=dbutils.widgets.get(\"app_id\")\n",
"user_id=dbutils.widgets.get(\"user_id\")\n",
"dataset_id=dbutils.widgets.get(\"dataset_id\")\n",
"image_df =ingest_image_dataframe(file_path,user_id,app_id,dataset_id)\n"
"image_df =ingest_image_into_app(file_path,user_id,app_id,dataset_id)\n"
]
}
],
Expand Down
11 changes: 4 additions & 7 deletions clarifaipyspark/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def export_annotations_to_dataframe(self, input_ids: list = None, input_type: st
response = list(self.list_annotations(input_ids=input_ids, input_type=input_type))
for an in response:
temp = {}
temp['annotation'] = str(an.data)
temp['annotation'] = str(an.data) if not an.data.metadata else {}
if not temp['annotation'] or temp['annotation'] == '{}':
continue
temp['annotation_id'] = an.id
Expand Down Expand Up @@ -415,11 +415,8 @@ def export_inputs_to_dataframe(self, input_type):
"""
if input_type not in ('image', 'text'):
raise UserError('Invalid input type, it should be image or text')

search_obj = Search(user_id=self.user_id, app_id=self.app_id, pat=self.pat)
search_response = search_obj.query(filters=[{"input_types":["image"]},{"input_dataset_ids":[self.dataset_id]}])
inputs=[hit.input for response in search_response for hit in response.hits]


inputs= list(self.list_inputs(input_type=input_type))
input_list=[]
for inp in inputs:
temp = {}
Expand Down Expand Up @@ -492,7 +489,7 @@ def export_annotations_to_volume(self, volumepath: str):
images_to_download=[]
for an in response:
temp = {}
temp['annotation'] = str(an.data)
temp['annotation'] = str(an.data) if not an.data.metadata else {}
if not temp['annotation'] or temp['annotation'] == '{}':
continue
temp['annotation_id'] = an.id
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
clarifai==9.10.3
clarifai==10.0.1
pyspark==3.5.0

0 comments on commit 691aa30

Please sign in to comment.