demo.py

#!/usr/bin/env python
from __future__ import annotations
import os
import sys
from pathlib import Path
current_file_path = Path(__file__).resolve()
sys.path.insert(0, str(current_file_path.parent.parent))
import random
import gradio as gr
import numpy as np
import uuid
from diffusers import StableDiffusionXLPipeline, EulerDiscreteScheduler
import torch
from typing import Tuple
from datetime import datetime
import argparse

DESCRIPTION = """![Logo](https://dl.dropbox.com/scl/fi/e2niisp985i40p7hww0u8/github_logo_v2.png?rlkey=q9bf1qtigka8bdbqmfjbc2rlu&st=45wxhwrd&dl=1)
        # KOALA-Lightning-700M 1024px
        #### [KOALA](https://youngwanlee.github.io/KOALA) is an effcient text-to-image diffusion model distilled from Stable Diffusion XL. This demo is generated by [KOALA-Lightning-700M](https://huggingface.co/etri-vilab/koala-lightning-700m) checkpoint.
        [[Project Page](https://youngwanlee.github.io/KOALA/)] [[Code](https://github.com/youngwanLEE/sdxl-koala)] | 📚 [[KOALA](https://arxiv.org/abs/2312.04005)]
        """

ACKNOWLEDGEMENT = """
### Acknowledgement  
This work was supported by the Institute of Information & communications Technology Planning & Evaluation (IITP) grant funded by the Korea government (MSIT) (No. RS-2022-00187238, Development of Large Korean Language Model Technology for Efficient Pre-training), 이 연구는 2023년도 정부(과학기술정보통신부)의 재원으로 정보통신기획평가원의 지원을 받아 수행된 연구임 (No. RS-2022-00187238, 효율적 사전학습이 가능한 한국어 대형 언어모델 사전학습 기술 개발)
"""

if not torch.cuda.is_available():
    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"

NEG_PROMPT = 'deformed iris, deformed pupils, deformed mouse, worst  quality, low quality, ugly, duplicate, morbid,  mutilated, extra fingers, mutated hands, poorly drawn hands, poorly  drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad  proportions, extra limbs, cloned face, disfigured, gross proportions,  malformed limbs, missing arms, missing legs BadDream'
MAX_SEED = np.iinfo(np.int32).max
# CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "1") == "1"
CACHE_EXAMPLES = False
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
USE_TORCH_COMPILE = False
ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
PORT = int(os.getenv("DEMO_PORT", "8000"))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

style_list = [
    {
        "name": "(No style)",
        "prompt": "{prompt}",
        "negative_prompt": "deformed iris, deformed pupils, deformed nose, deformed mouse, cropped, out of frame, worst  quality, low quality, jpeg artifacts, ugly, duplicate, morbid,  mutilated, extra fingers, mutated hands, poorly drawn hands, poorly  drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad  proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs BadDream",
    },
    {
        "name": "Cinematic",
        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
        "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
    },
    {
        "name": "Photographic",
        "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
        "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
    },
    {
        "name": "Anime",
        "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime,  highly detailed",
        "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
    },
    {
        "name": "Manga",
        "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
        "negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style",
    },
    {
        "name": "Digital Art",
        "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
        "negative_prompt": "photo, photorealistic, realism, ugly",
    },
    {
        "name": "Pixel art",
        "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
        "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic",
    },
    {
        "name": "Fantasy art",
        "prompt": "ethereal fantasy concept art of  {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
        "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white",
    },
    {
        "name": "Neonpunk",
        "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
        "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",
    },
    {
        "name": "3D Model",
        "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
        "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
    },
]

styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
STYLE_NAMES = list(styles.keys())
DEFAULT_STYLE_NAME = "(No style)"
NUM_IMAGES_PER_PROMPT = 1

def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
    p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
    if not negative:
        negative = ""
    return p.replace("{prompt}", positive), n + negative

try:
    pipe = StableDiffusionXLPipeline.from_pretrained(
        "etri-vilab/koala-lightning-700m",
        torch_dtype=torch.float16,
        use_safetensors=True,
        variant="fp16"
    ).to(device)
    print("Model loaded successfully!")
    
    pipe.enable_attention_slicing()
    torch.cuda.empty_cache()

    if USE_TORCH_COMPILE:
        pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
        print("Model Compiled!")

    pipe.scheduler = EulerDiscreteScheduler.from_config(
        pipe.scheduler.config, timestep_spacing="trailing"
    )
except Exception as e:
    print(f"Error loading model: {e}")

def save_image(prompt, seed, img):
    try:
        unique_name = str(uuid.uuid4()) + '.png'
        save_path = os.path.join('output', 'online_demo_img', str(datetime.now().date()))
        os.makedirs(save_path, exist_ok=True)
        
        unique_name = os.path.join(os.path.abspath(save_path), unique_name)
        img.save(unique_name)
        print(f"Image saved successfully at: {unique_name}")
        return unique_name
    except Exception as e:
        print(f"Error saving image: {e}")
        return None

def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    return seed

def generate(
        prompt: str,
        style: str = DEFAULT_STYLE_NAME,
        seed: int = 0,
        guidance_scale: float = 3.5,
        inference_steps: int = 10,
        randomize_seed: bool = False,
        use_resolution_binning: bool = True,
        progress=gr.Progress(track_tqdm=True),
):
    try:
        print(f"Starting generation with prompt: {prompt}")
        seed = int(randomize_seed_fn(seed, randomize_seed))
        generator = torch.Generator().manual_seed(seed)

        width = height = 1024

        prompt, negative_prompt = apply_style(style, prompt)

        images = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            width=width,
            height=height,
            guidance_scale=guidance_scale,
            num_inference_steps=inference_steps,
            generator=generator,
            num_images_per_prompt=NUM_IMAGES_PER_PROMPT,
            use_resolution_binning=use_resolution_binning,
            output_type="pil",
        ).images
        print("Image generation successful!")

        image_paths = []
        for img in images:
            try:
                path = save_image(prompt, seed, img)
                if path:
                    image_paths.append(path)
            except Exception as e:
                print(f"Error saving image: {e}")
                continue

        print(f"Generated image paths: {image_paths}")
        if not image_paths:
            print("No valid images generated")
            return [], seed

        return image_paths, seed

    except Exception as e:
        print(f"Generation error: {e}")
        return None, seed

examples = [
    'portrait photo of a standing girl, photograph, golden hair, depth of field, moody light, golden hour, style by Dan Winters, Russell James, Steve McCurry, centered, extremely detailed, Nikon D850, award winning photography',
    'professional portrait photo of an anthropomorphic cat wearing fancy gentleman hat and jacket walking in autumn forest.',
    'cute toy owl made of suede, geometric accurate, relief on skin, plastic relief surface of body, intricate details, cinematic',
    'Albert Einstein in a surrealist Cyberpunk 2077 world, hyperrealistic',
    'highly detailed albert einstein playing minecraft, epic laboratory office, shelves with detailed items in background, ((long shot)), highly detailed realistic painting by grandmaster, unreal engine, octane render, 4k, by artgerm and Drew Struzan and Krenz Cushart, trending on artstation',
    'Cute darth vader style minion, holidays in Paris, unreal engine, octane render, 4k',
    'Cute small dog sitting in a movie theater eating popcorn watching a movie ,unreal engine, cozy indoor lighting, artstation, detailed, digital painting,cinematic,character design by mark ryden and pixar and hayao miyazaki, unreal 5, daz, hyperrealistic, octane render',
    'higly detailed, majestic royal tall ship on a calm sea,realistic painting, by Charles Gregory Artstation and Antonio Jacobsen and Edward Moran, (long shot), clear blue sky, intricated details, 4k',
    'electronik robot and ofice ,unreal engine, cozy indoor lighting, artstation, detailed, digital painting,cinematic,character design by mark ryden and pixar and hayao miyazaki, unreal 5, daz, hyperrealistic, octane render',
    'Astronaut in a jungle, cold color palette, muted colors, detailed, 8k',
]

with gr.Blocks(css="""
    #result-gallery img {
        max-width: 100%;
        max-height: 100%;
        object-fit: contain;
    }
""") as demo:
    gr.Markdown(DESCRIPTION)
    if os.getenv("SHOW_DUPLICATE_BUTTON") == "1":
        gr.Button("Duplicate Space for private use", elem_id="duplicate-button")

    with gr.Group():
        with gr.Row():
            prompt = gr.Textbox(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )
            run_button = gr.Button("Run", scale=0)
            
        result = gr.Gallery(
            label="Result",
            columns=NUM_IMAGES_PER_PROMPT,
            show_label=False,
            height="auto",
            object_fit="contain",
            elem_id="result-gallery"  # CSS 스타일링을 위한 ID 추가
        )

    with gr.Accordion("Advanced options", open=False):
        style_selection = gr.Radio(
            show_label=True,
            container=True,
            interactive=True,
            choices=STYLE_NAMES,
            value=DEFAULT_STYLE_NAME,
            label="Image Style",
        )
        seed = gr.Slider(
            label="Seed",
            minimum=0,
            maximum=MAX_SEED,
            step=1,
            value=0,
        )
        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
        with gr.Row():
            guidance_scale = gr.Slider(
                label="Guidance scale",
                minimum=1,
                maximum=10,
                step=0.5,
                value=3.5,
            )
            steps = gr.Slider(
                label="Denoising Steps",
                minimum=8,
                maximum=50,
                step=1,
                value=10,
            )

    gr.Examples(
        examples=examples,
        inputs=prompt,
        outputs=[result, seed],
        fn=generate,
        cache_examples=CACHE_EXAMPLES,
        api_name="examples"
    )
    
    gr.Markdown(ACKNOWLEDGEMENT)

    prompt.submit(
        fn=generate,
        inputs=[prompt, style_selection, seed, guidance_scale, steps, randomize_seed],
        outputs=[result, seed],
        api_name="run_prompt"
    )
    
    run_button.click(
        fn=generate,
        inputs=[prompt, style_selection, seed, guidance_scale, steps, randomize_seed],
        outputs=[result, seed],
        api_name="run_button"
    )

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--ip", type=str, default=None, help="IP address to launch the Gradio app.")
    parser.add_argument("--port", type=int, default=PORT, help="Port number to use for the Gradio app.")

    args = parser.parse_args()

    print(f"Launching Gradio app on IP: {args.ip or 'localhost'}, Port: {args.port}")  # 디버깅 로그

    if args.ip:
        demo.launch(server_name=args.ip, server_port=args.port, show_error=True)
    else:
        demo.launch(share=True, server_port=args.port, show_error=True)