-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataset.py
51 lines (40 loc) · 1.32 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
'''
Dataset File.
'''
import clip
from PIL import Image
from torch.utils.data import Dataset
class imageTitleDataset(Dataset):
def __init__(self,
list_image_path,
list_txt,
list_txt_cf,
clip_version="ViT-B/32"):
'''
Arguments
---------
list_image_path
A list of image paths
list_txt
A list of true captions
list_txt_cf
A list of counterfactual captions
Preprocesses images and tokenizes texts using CLIP's
preprocessing function and tokenizer.
'''
# Initialize image paths and corresponding texts
self.image_path = list_image_path
self.text = list_txt
# Tokenize text using CLIP's tokenizer
self.caption = clip.tokenize(list_txt)
self.cf_caption = clip.tokenize(list_txt_cf)
_, self.preprocess = clip.load(clip_version)
def __len__(self):
return len(self.caption)
def __getitem__(self, idx):
# Preprocess image using CLIP's preprocessing function
image = self.preprocess(Image.open(self.image_path[idx]))
text = self.text[idx]
caption = self.caption[idx]
cf_caption = self.cf_caption[idx]
return image, caption, cf_caption, text