-
Notifications
You must be signed in to change notification settings - Fork 0
/
configuration_README.txt
150 lines (122 loc) · 5.75 KB
/
configuration_README.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
[COMMON]
datasetPathVideo = Path to video files. The videos can be in inner folders.
datasetPathDatabase = Path to create the database and read from it later
cuda= Whether to use CUDA. Must be 1
cpus= No. of CPU cores to use then multithreading or multiprocessing. A good no. to use is half the no. of CPUs in your machine.
resizeImageTo = Resize image to a certain width x height. This is also used as the output dims of the network. Use a lower number if OOM errors occur.
audio_embs_options = wav2vec or openl3 or pyannoteTitaNet
audio_embs = wav2vec or openl3 are depreacated. Must use pyannoteTitaNet
unet1_dim = 64
unet1_image_size = 16
insert_amd_env_vars = Input 0 to not insert AMD environment variables. Anything else to insert them. The environment variables are HSA_OVERRIDE_GFX_VERSION and ROCM_PATH. They are sometimes used to get CUDA working on AMD machines after installing it.
HSA_OVERRIDE_GFX_VERSION = Get CUDA working on newer AMD than those supported by overriding GFX version
ROCM_PATH = ROCm Path
[dbCreateAndPopulate]
recreateDb= Insert 0 to insert into a previously created database, or insert anything else to recreate the database from scratch.
[extractAudio]
datasetPathAudio = Path to extract audio files. Audio files will be deleted so will be empty.
dbChunk = Chunk of video files to get from database at a time.
time_to_wait_before_deleting_files = Time to wait before deleting files in seconds. If the program is stopped abruptly please delete any audio files created in the execution folder.
[extractFaces]
expandFaceVerticalRatio = Expand vertical cropped face image by a ratio of the cropped face image height. Do not change.
expandFaceHorizontalRatio= Expand horizontal cropped face image by a ratio of the cropped face image width. Do not change.
faceDetectionDeepFaceBackend= Which backend to use for face detection and cropping. 'opencv' or 'ssd' or 'dlib' or 'mtcnn' or 'retinaface' or 'mediapipe'. Do not change.
parallelism = Chunk of video files to get from database at a time.
parallelismFrames = No of frames to extract per video. Note: Regardless of no. of frames extracted, only one will be used in training. So better choose 1.
datasetPathFrames = Path to export frames. Frames will be deleted so will be empty.
datasetPathFaces = Path to export face images.
[fineTuneStableDiffusionTraining] DEPREACATED
db_chunk = 50000
dev_mode = 0
continue_from_epoch = 1
continue_from_offset = 0
continue_from_epoch_and_offset = 0
unconditional_guidance_scale = 40
[extractOpenL3] DEPREACATED
datasetPathAudio = /home/gamal/Datasets/Dataset1/Audio
dbChunk = 240
time_to_wait_before_deleting_files = 60
openl3_mode_options = stable or imagen
openl3_mode = imagen
[extractWavToVec] DEPREACATED
datasetPathAudio = /home/gamal/Datasets/Dataset1/Audio
dbChunk = 240
time_to_wait_before_deleting_files = 60
audio_length_wav2vec = 6
[extractPyannoteTitaNet]
datasetPathAudio = Path to extract audio files. Audio files will be deleted so will be empty.
dbChunk = Chunk of video files to get from database at a time.
time_to_wait_before_deleting_files = Time to wait before deleting files in seconds. If the program is stopped abruptly please delete any audio files created in the execution folder.
[extractAudioFeatures]
datasetPathAudio = /home/gamal/Datasets/Dataset1/Audio
dbChunk = 240
time_to_wait_before_deleting_files = 60
[fineTuneStableDiffusionTesting] DEPREACATED
use_video_in_configuration = 1
video_path = /home/gamal/Datasets/Dataset1/Video/blhA_I4zjvE_15.600000_22.333333.mp4
time_to_wait_before_deleting_files = 180
dev_mode = 0
[train_imagen]
model_filename = Model filename. The actual filename will have _(audio_length_used)s added to it. train_imagen_all file will use this file or create one if it is not created.
audio_length_used = 6 or 12 or 24
epochs = No. of epochs to train
sample_every_offset = 2 depreacated
save_every_offset = 2 depreacated
sub_epochs = No. of training steps for a single batch
sample_every = Get a test image using one of the inputs every this no. of training steps.
save_model_every = Save the model every this no. of training steps.
batch_size = Batch size. Note that gradient accumulation is used and effective batch size is batch_size x 8. Decrease if you run into OOM errors.
continue_from_epoch = If continue_from_epoch_and_offset_flag is any number but zero. Continue from this epoch.
continue_from_offset = If continue_from_epoch_and_offset_flag is any number but zero. Continue from this database offset.
continue_from_epoch_and_offset_flag = If any number but zero, continue from a certain epoch and offset. Used to continue training from a save point.
db_chunk = Data chunk to get from database at a time. Reduce if memory isn't sufficient.
[test_imagen]
video_path = v.mp4
time_to_wait_before_deleting_files = 180
audio_length_used = 24
model_filename = model_live
openl3_mode_options = stable or imagen
openl3_mode = imagen
folder = imagen-tests-folder
number_of_images = 3
age = 25
ethnicity = black
gender = man
language = English
[extractVggBlurred]
dbChunk = 20
boxBlurMin = 4
boxBlurMax = 14
gaussianBlurMin=2
gaussianBlurMax=6
python dbCreateAndPopulate.py
python dbCreateAdditional.py
python dbCreateFacesBlurred.py
python extractAudio.py
python extractFaces.py
until python extractPyannoteTitaNet.py
do
echo "Restarting"
sleep 2
done
until python extractAudioFeatures.py
do
echo "Restarting"
sleep 2
done
until python extractVggBlurred.py
do
echo "Restarting"
sleep 2
done
until python extractAudioTransformer.py
do
echo "Restarting"
sleep 2
done
train_imagen_all_u1.ipynb
train_imagen_all_u2.ipynb
pip uninstall protobuf
pip install protobuf==3.9.2
pip install protobuf
find . -maxdepth 1 -type f -print0 | head -z -n 10000 | xargs -0 -r -- cp -t "/media/gamal/Passport/Datasets/VoxCeleb2/Voxceleb2VQVAETrain" --