index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description"
        content="Deformable Neural Radiance Fields creates free-viewpoint portraits (nerfies) from casually captured videos.">
  <meta name="keywords" content="Nerfies, D-NeRF, NeRF">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>FairQueue: Rethinking Prompt Learning for Fair Text-to-Image Generation (NeurIPS24)</title>

  <!-- Global site tag (gtag.js) - Google Analytics -->
  <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script>
  <script>
    window.dataLayer = window.dataLayer || [];

    function gtag() {
      dataLayer.push(arguments);
    }

    gtag('js', new Date());

    gtag('config', 'G-PYVRSFMDRL');
  </script>

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
        rel="stylesheet">

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
        href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
  <!--<link rel="icon" href="./static/images/favicon.svg">-->

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script defer src="./static/js/fontawesome.all.min.js"></script>
  <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script>
  <script src="./static/js/index.js"></script>
</head>
<body>

<!--
<nav class="navbar" role="navigation" aria-label="main navigation">
  <div class="navbar-brand">
    <a role="button" class="navbar-burger" aria-label="menu" aria-expanded="false">
      <span aria-hidden="true"></span>
      <span aria-hidden="true"></span>
      <span aria-hidden="true"></span>
    </a>
  </div>
  <div class="navbar-menu">
    <div class="navbar-start" style="flex-grow: 1; justify-content: center;">
      <a class="navbar-item" href="https://keunhong.com">
      <span class="icon">
          <i class="fas fa-home"></i>
      </span>
      </a>

      <div class="navbar-item has-dropdown is-hoverable">
        <a class="navbar-link">
          More Research
        </a>
        <div class="navbar-dropdown">
          <a class="navbar-item" href="https://hypernerf.github.io">
            HyperNeRF
          </a>
          <a class="navbar-item" href="https://nerfies.github.io">
            Nerfies
          </a>
          <a class="navbar-item" href="https://latentfusion.github.io">
            LatentFusion
          </a>
          <a class="navbar-item" href="https://photoshape.github.io">
            PhotoShape
          </a>
        </div>
      </div>
    </div>

  </div>
</nav>
-->


<!-- Author info -->
<section class="hero">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <h1 class="title is-1 publication-title">NeurIPS24: FairQueue: Rethinking Prompt Learning for Fair Text-to-Image Generation</h1>
          <div class="is-size-5 publication-authors">
            <span class="author-block">
              <a href="-">Christopher T. H. Teo</a><sup>1</sup>,</span>
            <span class="author-block">
              <a href="-">Milad Abdollahzadeh</a><sup>1</sup>,</span>
              <span class="author-block">
                <a href="-">Xinda Ma</a>,</span>
            <span class="author-block">
              <a href="-">Ngai-Man Cheung</a><sup>*</sup>,
            </span>
          </div>

          <div class="is-size-5 publication-authors">
            <span class="author-block"><sup>1</sup>Singapore Univeristy of Technology and Design (SUTD),</span>
          </div>

          <div class="column has-text-centered">
            <div class="publication-links">
              <!-- PDF Link. -->
              <span class="link-block">
                <a href="https://openreview.net/forum?id=3MW44iNdrD"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fas fa-file-pdf"></i>
                  </span>
                  <span>Paper</span>
                </a>
              </span>
              <span class="link-block">
                <a href="https://arxiv.org/abs/2410.18615"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="ai ai-arxiv"></i>
                  </span>
                  <span>arXiv</span>
                </a>
              </span>
              <!-- Video Link. -->
              <!--
              <span class="link-block">
                <a href="-"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-youtube"></i>
                  </span>
                  <span>Video</span>
                </a>
              </span>
            -->
              <!-- Code Link. -->
              <span class="link-block">
                <a href="https://github.com/Bearwithchris/FairQueue_Code"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-github"></i>
                  </span>
                  <span>Code</span>
                  </a>
              </span>
            </div>

          </div>
        </div>
      </div>
    </div>
  </div>
</section>

<!--
<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
      <video id="teaser" autoplay muted loop playsinline height="100%">
        <source src="./static/videos/teaser.mp4"
                type="video/mp4">
      </video>
      <h2 class="subtitle has-text-centered">
        <span class="dnerf">Nerfies</span> turns selfie videos from your phone into
        free-viewpoint
        portraits.
      </h2>
    </div>
  </div>
</section>


<section class="hero is-light is-small">
  <div class="hero-body">
    <div class="container">
      <div id="results-carousel" class="carousel results-carousel">
        <div class="item item-steve">
          <video poster="" id="steve" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/steve.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-chair-tp">
          <video poster="" id="chair-tp" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/chair-tp.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-shiba">
          <video poster="" id="shiba" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/shiba.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-fullbody">
          <video poster="" id="fullbody" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/fullbody.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-blueshirt">
          <video poster="" id="blueshirt" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/blueshirt.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-mask">
          <video poster="" id="mask" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/mask.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-coffee">
          <video poster="" id="coffee" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/coffee.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-toby">
          <video poster="" id="toby" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/toby2.mp4"
                    type="video/mp4">
          </video>
        </div>
      </div>
    </div>
  </div>
</section>
-->

<section class="section">
  <div class="container is-max-desktop">
    <!-- Abstract. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Recently, prompt learning has emerged as the state-of-the-art (SOTA) for fair text-
            to-image (T2I) generation. Specifically, this approach leverages readily available
            reference images to learn inclusive prompts for each target Sensitive Attribute (tSA),
            allowing for fair image generation. In this work, we first reveal that this prompt
            learning-based approach results in degraded sample quality. Our analysis shows that
            the approach’s training objective–which aims to align the embedding differences of
            learned prompts and reference images–could be sub-optimal, resulting in distortion
            of the learned prompts and degraded generated images.
            To further substantiate this claim, as our major contribution, we deep dive into
            the denoising subnetwork of the T2I model to track down the effect of these learned
            prompts by analyzing the cross-attention maps. In our analysis, we propose novel
            prompt switching analysis: I2H and H2I. Furthermore, we propose new quantitative
            characterization of cross-attention maps. Our analysis reveals abnormalities in
            the early denoising steps, perpetuating improper global structure that results in
            degradation in the generated samples. Building on insights from our analysis, we
            propose two ideas: (i) Prompt Queuing and (ii) Attention Amplification to address
            the quality issue. Extensive experimental results on a wide range of tSAs show
            that our proposed method outperforms SOTA approach’s image generation quality,
            while achieving competitive fairness.
          </p>
        </div>
      </div>
    </div>
    <!--/ Abstract. -->


<!-- Body -->
<div class="container is-max-desktop">

    <div class="columns is-centered">

      <!-- Visual Effects. -->
      <!-- <div class="column">
        <div class="content">
          <h2 class="title is-3">Visual Effects</h2>
          <p>
            Using <i>nerfies</i> you can create fun visual effects. This Dolly zoom effect
            would be impossible without nerfies since it would require going through a wall.
          </p>
          <video id="dollyzoom" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/dollyzoom-stacked.mp4"
                    type="video/mp4">
          </video>
        </div>
      </div> -->
      <!--/ Visual Effects. -->

      <!-- Matting. -->
      <!-- <div class="column">
        <h2 class="title is-3">Matting</h2>
        <div class="columns is-centered">
          <div class="column content">
            <p>
              As a byproduct of our method, we can also solve the matting problem by ignoring
              samples that fall outside of a bounding box during rendering.
            </p>
            <video id="matting-video" controls playsinline height="100%">
              <source src="./static/videos/matting.mp4"
                      type="video/mp4">
            </video>
          </div>

        </div>
      </div>
    </div> -->
    <!--/ Matting. -->

    <!-- Animation. -->
    <div class="columns is-centered">
      <!-- <div class="column is-full-width"> -->
        <!-- <h2 class="title is-3">Animation</h2> -->

        <!-- Interpolating. -->
        <!-- <h3 class="title is-4">Interpolating states</h3>
        <div class="content has-text-justified">
          <p>
            We can also animate the scene by interpolating the deformation latent codes of two input
            frames. Use the slider here to linearly interpolate between the left frame and the right
            frame.
          </p>
        </div>
        <div class="columns is-vcentered interpolation-panel">
          <div class="column is-3 has-text-centered">
            <img src="./static/images/interpolate_start.jpg"
                 class="interpolation-image"
                 alt="Interpolate start reference image."/>
            <p>Start Frame</p>
          </div>
          <div class="column interpolation-video-column">
            <div id="interpolation-image-wrapper">
              Loading...
            </div>
            <input class="slider is-fullwidth is-large is-info"
                   id="interpolation-slider"
                   step="1" min="0" max="100" value="0" type="range">
          </div>
          <div class="column is-3 has-text-centered">
            <img src="./static/images/interpolate_end.jpg"
                 class="interpolation-image"
                 alt="Interpolation end reference image."/>
            <p class="is-bold">End Frame</p>
          </div>
        </div>
        <br/> -->
        <!--/ Interpolating. -->

        <!-- Re-rendering. -->
        <!-- <h3 class="title is-4">Re-rendering the input video</h3>
        <div class="content has-text-justified">
          <p>
            Using <span class="dnerf">Nerfies</span>, you can re-render a video from a novel
            viewpoint such as a stabilized camera by playing back the training deformations.
          </p>
        </div>
        <div class="content has-text-centered">
          <video id="replay-video"
                 controls
                 muted
                 preload
                 playsinline
                 width="75%">
            <source src="./static/videos/replay.mp4"
                    type="video/mp4">
          </video>
        </div> -->
        <!--/ Re-rendering. -->

      </div>
    </div>
    <!-- / Animation. -->


    <!-- Overview -->
    <div class="columns is-centered">
      <div class="column is-full-width">
        <h2 class="title is-3">Overview</h2>

        <div class="content has-text-justified">
			<center>
				<table align="center" width="880px">
					<tbody><tr>
						<td width="260px">
							<center>
								<img class="round" style="width:880px" src="./resources/Fig_1_test_v3_compressed.png">
							</center>
						</td>
					</tr>
				</tbody></table>
				<table align="center" width="880px">
					<tbody><tr>
						<td>
							<p style="text-align:justify; text-justify:inter-ideograph;">
                </p><h4 class="title is-5">Contributions</h4>
		We examine SOTA prompt learning methods that utilize directional alignment between prompt embeddings and reference image embeddings in fair T2I generation. Our study reveals two key issues:
		<ul>
			<li>The generation of a moderate number of samples with degraded quality.</li>
			<li>The potential for noisy reference image embeddings, as they may capture unrelated concepts beyond the target Sensitive Attribute (tSA), resulting in sub-optimal learning.</li>
		</ul>
		To address this, we propose a novel analysis framework (H2I/I2H) that scrutinizes the cross-attention maps during the denoising process of T2I generation. The analysis highlights abnormalities in the learned prompts, particularly in the early denoising steps. Based on these insights, we introduce FairQueue, an improved method that addresses quality issues while maintaining competitive fairness.

                <!--
							<b>1: </b>
              We conduct a study to reveal that even highly-accurate SA classifiers could still incur significant
              fairness measurement errors when using existing framework.
              <br> <br>
							<b>2: </b>
              To enable evaluation of fairness measurement frameworks, we propose new datasets based on
              generated samples from StyleGAN, StyleSwin and SDM, with manual labeling w.r.t. SA
              <br> <br>
              <b>3: </b>
              We propose a new and accurate fairness measurement framework, CLEAM, that accounts for SA
              classifier inaccuracies and provides point and interval estimates
              <br> <br>
              <b>4: </b>
              Using CLEAM, we reveal considerable biases in several important generative models, prompting
              careful consideration when applying them for different applications.
              <br>
            -->
          	</td>
					</tr>
				</tbody></table>
				<table align="center" width="880px">
					<tbody><tr>
						<td width="260px">
							<!-- <center>
								<img class="round" style="width:880px" src="./resources/method.jpg"/>
							</center> -->
						</td>
					</tr>
				</tbody></table>
			</center>
        </div>
      </div>
    </div>
    <!--/ Overview -->

  </div>


<!--Bibtex-->
    <section class="section" id="BibTeX">
      <div class="container is-max-desktop content">
        <h2 class="title">BibTeX</h2>
        <pre><code>
@misc{teo2024fairqueuerethinkingpromptlearning,
      title={FairQueue: Rethinking Prompt Learning for Fair Text-to-Image Generation}, 
      author={Christopher T. H Teo and Milad Abdollahzadeh and Xinda Ma and Ngai-man Cheung},
      year={2024},
      eprint={2410.18615},
      archivePrefix={arXiv},
      primaryClass={cs.CV},
      url={https://arxiv.org/abs/2410.18615}, 
}
</code></pre>
      </div>
    </section>

    <footer class="footer">
      <div class="container">
        <!-- <div class="content has-text-centered">
          <a class="icon-link"
             href="./static/videos/nerfies_paper.pdf">
            <i class="fas fa-file-pdf"></i>
          </a>
          <a class="icon-link" href="https://github.com/keunhong" class="external-link" disabled>
            <i class="fab fa-github"></i>
          </a>
        </div> -->
        <div class="columns is-centered">
          <div class="column is-8">
            <div class="content">
              <p>
                This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
                Commons Attribution-ShareAlike 4.0 International License</a>.
              </p>
              <p>
                This project page is constructed using the wonderful template provided by <a href="https://nerfies.github.io/">Nerfies</a>.
              </p>
            </div>
          </div>
        </div>
      </div>
    </footer>

</body>
</html>