diff --git a/index.html b/index.html index d49000f..181d30d 100644 --- a/index.html +++ b/index.html @@ -6,7 +6,7 @@ content="We introduce token-wise consistency terms between the image content and object segmentation maps in training text-to-image models for enhanced multi-category instance composition and photorealism."> - TokenCompose: Grounding Diffusion with Token-level Supervision + TokenCompose: Text-to-Image Diffusion with Token-level Supervision @@ -64,8 +64,7 @@

- TokenCompose - : Grounding Diffusion with Token-level Supervision + TokenCompose: Text-to-Image Diffusion with Token-level Supervision

@@ -178,7 +177,7 @@

A Stable Diffusion - model finetuned with token-wise grounding objectives for enhanced multi-category instance composition and photorealism. + model finetuned with token-wise consistency terms for enhanced multi-category instance composition and photorealism.

@@ -303,7 +302,7 @@

- Given a training prompt that faithfully describes an image, we adopt a POS tagger and Grounded SAM to extract all binary segmentation maps of the image corresponding to noun tokens from the prompt. Then, we jointly optimize the denoising U-Net of the diffusion model with both its original denoising and our grounding objective. + Given a training prompt that faithfully describes an image, we adopt a POS tagger and Grounded SAM to extract all binary segmentation maps of the image corresponding to noun tokens from the prompt. Then, we jointly optimize the denoising U-Net of the diffusion model with both its original denoising and our token-wise objective.

@@ -1128,13 +1127,13 @@

-          @misc{wang2023tokencompose,
-            title={TokenCompose: Grounding Diffusion with Token-level Supervision}, 
-            author={Zirui Wang and Zhizhou Sha and Zheng Ding and Yilin Wang and Zhuowen Tu},
-            year={2023},
-            eprint={2312.03626},
-            archivePrefix={arXiv},
-            primaryClass={cs.CV}
+          @InProceedings{Wang2024TokenCompose,
+            author    = {Wang, Zirui and Sha, Zhizhou and Ding, Zheng and Wang, Yilin and Tu, Zhuowen},
+            title     = {TokenCompose: Text-to-Image Diffusion with Token-level Supervision},
+            booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+            month     = {June},
+            year      = {2024},
+            pages     = {8553-8564}
         }