-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Magic_clip_311_code.py
72 lines (57 loc) · 3.07 KB
/
Magic_clip_311_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Please support 311_code on https://ko-fi.com/311_code
import torch
from nodes import MAX_RESOLUTION
class CLIPTextEncodeSDXL_311_code:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
"height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
"text_g": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"clip": ("CLIP", ),
"text_l": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"clip": ("CLIP", ),
"clip_g_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
"clip_l_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
"size_cond_factor": ("INT", {"default": 4, "min": 1, "max": 16}),
"layer_idx": ("INT", {"default": -2, "min": -33, "max": 33}), # Added layer_idx
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "execute"
CATEGORY = "essentials"
def execute(self, clip, width, height, size_cond_factor, text_g, text_l, clip_g_strength, clip_l_strength, layer_idx):
width = width * size_cond_factor
height = height * size_cond_factor
cond_combined = None
pooled_combined = None
# Modify CLIP layers based on `layer_idx`
clip = clip.clone() # Clone the clip model to ensure it's modifiable
clip.clip_layer(layer_idx) # Set the stopping point for the CLIP model based on layer_idx
# Tokenize both texts, or use empty tokens if both are empty
tokens_g = clip.tokenize(text_g) if text_g.strip() else clip.tokenize("")
tokens_l = clip.tokenize(text_l) if text_l.strip() else clip.tokenize("")
# Ensure equal length by padding with empty tokens if needed
if len(tokens_l["l"]) != len(tokens_g["g"]):
empty_g = clip.tokenize("")["g"]
empty_l = clip.tokenize("")["l"]
while len(tokens_l["l"]) < len(tokens_g["g"]):
tokens_l["l"] += empty_l
while len(tokens_g["g"]) < len(tokens_l["l"]):
tokens_g["g"] += empty_g
# Encode tokens
tokens = {"g": tokens_g["g"], "l": tokens_l["l"]}
cond_combined, pooled_combined = clip.encode_from_tokens(tokens, return_pooled=True)
# Apply strengths to g and l if needed
if cond_combined.dim() == 3:
if text_g.strip(): # Only apply strength to g if text_g is not empty
cond_combined[:, :, :] *= clip_g_strength
if text_l.strip(): # Only apply strength to l if text_l is not empty
cond_combined[:, :, :] *= clip_l_strength
# Return the conditioning even if both texts were empty
return ([[cond_combined, {"pooled_output": pooled_combined, "width": width, "height": height}]], )
NODE_CLASS_MAPPINGS = {
"CLIPTextEncodeSDXL_311_code": CLIPTextEncodeSDXL_311_code
}
NODE_DISPLAY_NAME_MAPPINGS = {
"CLIPTextEncodeSDXL_311_code": "311_code SDXL Clip Text Encode"
}