generate_story.py

import urllib
import requests
import moviepy.editor as mp
from moviepy.audio.fx.volumex import volumex
import zoom_effect
import voice_generator
import os, tempfile, logging

# default_size = (1080, 1920)
default_size = (576, 1024)
# full_text_size = (720, 900)
full_text_size = (400, 600)
# full_text_position = (140, 280)
full_text_position = (100, 150)

# default_local_file_storage = "/storage"
default_local_file_storage = os.getenv('VIDEO_GENERATION_STORAGE_PATH', '/root/moviepyvideo/storage')

sound_storage = os.path.join(default_local_file_storage, 'audio', 'sounds')

preview = False


def generate_story_fragment(
        text,
        voice_clip,
        music_clip,
        video_clip,
        duration,
        sound_clips=None,
        text_size=full_text_size,
        text_position=full_text_position,
):
    print('Generating video cut for text {}'.format(text))

    video_cut = video_clip.subclip(0, duration).resize(default_size)

    print('text clip creation', text)

    text_clip = (
        mp.TextClip(
            txt=text,
            fontsize=30,
            bg_color='black',
            color='white',
            method='caption',
            size=text_size,
            stroke_width=1,
            stroke_color='white'
        )
        .set_opacity(0.7)
        .set_position('center')
        .set_duration(duration)
        .margin(top=20, bottom=20, left=20, right=20, opacity=0)
    )

    result_video_clip = mp.CompositeVideoClip([video_cut, text_clip])

    clips_audio = [voice_clip, music_clip]
    if sound_clips is not None:
        clips_audio += sound_clips

    print('clips_audio', clips_audio)

    result_video_clip.audio = mp.CompositeAudioClip(clips_audio)

    return result_video_clip


class StoryGenerator:
    def __init__(self,
                 voice_audio_generator,
                 local_storage=default_local_file_storage,
                 ):
        self.local_storage = local_storage
        self.voice_audio_generator = voice_audio_generator
        self.generated_video_file_storage = '{}/video/generated'.format(self.local_storage)

        os.makedirs(self.generated_video_file_storage, exist_ok=True)

    def generate_video(
            self,
            project_id,
            title,
            paragraphs,
            background_music,
            speech,
    ):
        fragments = []

        music_clip = mp.AudioFileClip(background_music['path']).fx(volumex, 0.3)
        temp_dir = tempfile.TemporaryDirectory()
        print('temp dir {}'.format(temp_dir.name))

        latest_duration_music = background_music['start'] if 'start' in background_music else 0

        for i in range(len(paragraphs)):
            print('start elements', i)
            text_audios = []
            video_duration = 0
            for j in range(len(paragraphs[i]['text'])):
                print('start text', j)
                text = paragraphs[i]['text'][j]['text']
                voice_file_name = '{}/{}_{}_voice.mp3'.format(temp_dir.name, i, j)
                self.voice_audio_generator.generate_text_audio(text, speech, voice_file_name)
                voice_clip = mp.AudioFileClip(voice_file_name)
                text_audios.append(voice_clip)

                video_duration += voice_clip.duration + 1

            print('going to generate video from image')

            video_clip = zoom_effect.clip_from_image(paragraphs[i]['image'], video_duration, size=default_size)
            latest_duration_video = 0

            print('going to generate video')

            for j in range(len(paragraphs[i]['text'])):
                print('going to generate video part', j)
                duration = text_audios[j].duration + 1
                sound_clips = []

                if 'sound' in paragraphs[i]['text'][j]:
                    for sound_name in paragraphs[i]['text'][j]['sound']:
                        try:
                            print('sound_name', sound_name.lower())
                            sound_clip = mp.AudioFileClip(os.path.join(sound_storage, '{}.mp3'.format(sound_name.lower()))).fx(
                                volumex,
                                0.4)
                            if sound_clip.duration > text_audios[j].duration:
                                sound_clip = sound_clip.subclip(0, duration)

                            sound_clips.append(sound_clip)
                        except Exception as e:
                            print('sound {} not found: {}'.format(sound_name, e))

                if len(sound_clips) == 0:
                    sound_clips = None

                fragment = generate_story_fragment(
                    text=paragraphs[i]['text'][j]['text'],
                    voice_clip=text_audios[j],
                    music_clip=music_clip.subclip(latest_duration_music, latest_duration_music + duration),
                    video_clip=video_clip.subclip(latest_duration_video, latest_duration_video + duration),
                    duration=duration,
                    sound_clips=sound_clips,
                )
                fragments.append(fragment)

                latest_duration_music += duration
                latest_duration_video += duration

        print('Concatenating fragments')

        final_clip = mp.concatenate_videoclips(fragments, method="compose")

        print('Saving the file')

        video_path = self.process_final_video_clip(final_clip, project_id, title)
        final_clip.close()
        temp_dir.cleanup()

        return video_path

    def process_final_video_clip(self, video_clip, project_id, title):
        if preview:
            aud = video_clip.audio.set_fps(44100)
            preview_clip = video_clip.without_audio().set_audio(aud)
            preview_clip.preview()

            return

        os.makedirs("{}/{}".format(self.generated_video_file_storage, project_id), exist_ok=True)
        file_path = "{}/{}/{}.mp4".format(self.generated_video_file_storage, project_id, title)

        video_clip.write_videofile(
            file_path,
            codec='libx264',
            audio_codec='aac',
            temp_audiofile='temp-audio.m4a',
            remove_temp=True,
            threads = 4,
            fps=24
        )

        return "{}.mp4".format(title)


def generate_story_video_from_config(videos_config):
    download_files_from_config_and_update_config(videos_config)

    text_voice_generator = voice_generator.VoiceGenerator()
    story_generator = StoryGenerator(text_voice_generator)
    try:
        return story_generator.generate_video(
            project_id=videos_config['project_id'],
            title=videos_config['title'],
            paragraphs=videos_config['paragraphs'],
            background_music=videos_config['background_sound'],
            speech=videos_config['speech'],
        )
    except Exception as e:
        logging.error('error during generation story video: {}'.format(e))


def download_files_from_config_and_update_config(videos_config):
    for i in range(len(videos_config['paragraphs'])):
        image_url = videos_config['paragraphs'][i]['image']
        image_title = videos_config['paragraphs'][i]['image_title']
        image_file_name = '{}/{}/{}.png'.format(default_local_file_storage, "images", image_title)
        print('image_file_name', image_file_name)
        if not os.path.exists(image_file_name):
            print('downloading image', image_url)
            urllib.request.urlretrieve(image_url, image_file_name)

        videos_config['paragraphs'][i]['image'] = image_file_name

    music_url = videos_config['background_sound']['path']
    parts = music_url.split("/")
    last_part = parts[-1].split("?")[0]
    print('last_part', last_part)
    music_file_name = '{}/{}/{}/{}'.format(default_local_file_storage, "audio", "background", last_part)

    url = music_url
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        if not os.path.exists(music_file_name):
            with open(music_file_name, "wb") as f:
                f.write(response.content)
            print("File downloaded successfully.")

        videos_config['background_sound']['path'] = music_file_name
    else:
        print("Error downloading file. Status code:", response.status_code)


    # print('music_file_name', music_file_name)
    # if not os.path.exists(music_file_name):
    #     print('downloading image', music_url)
    #     urllib.request.urlretrieve(music_url, music_file_name)

if __name__ == "__main__":
    # config = {
    #     "project_id": "8c3de4dd",
    #     "title": "Test",
    #     "background_sound": {
    #         "path": "https://cdn.pixabay.com/download/audio/2022/01/18/audio_3ad65087c4.mp3?filename=thriller-ambient-14563.mp3",
    #         "start": 40,
    #     },
    #     "speech": {
    #         "language": "en-US",
    #         "voice": "Matthew"
    #     },
    #     "paragraphs": [
    #         {
    #             "image_title": "dark and foreboding mansion with cracked walls and overgrown foliage",
    #             "image": "https://replicate.delivery/pbxt/AHrvFfe3eEd9fSNhicwGnSKgFFyFkW8voegIqAmzox2e93nGE/out-0.png",
    #             "text": [
    #                 {
    #                     "sound": ["thunder"],
    #                     "text": "The storm was loud and scary, and I couldn't help but feel like I was being watched as I walked up the long driveway to the mansion."
    #                 },
    #                 {
    #                     "text": "The air was heavy with the smell of decay and must, and I couldn't shake the feeling that something terrible was waiting for me inside.",
    #                 },
    #                 {
    #                     "sound": ["door creek"],
    #                     "text": "As I approached the front door, I hesitated for a moment, wondering if I should turn back.",
    #                 },
    #                 {
    #                     "text": "But my curiosity won out and I pushed the door open, stepping into the darkness beyond.",
    #                 },
    #             ]
    #         },
    #         {
    #             "image_title": "creepy hallway with broken chandelier and shadowy figures",
    #             "image": "https://replicate.delivery/pbxt/WtCfp8htkg2yEK0osaCeRH8vbW3FMgSPVaqRTE9yrimwfepBB/out-0.png",
    #             "text": [
    #                 {
    #                     "text": "As I moved further into the mansion, the air grew colder and the shadows seemed to stretch out and claw at me.",
    #                 },
    #                 {
    #                     "sound": ["concrete footsteps"],
    #                     "text": "The sound of my own footsteps echoing off the walls did little to comfort me, and I couldn't shake the feeling that I was being followed.",
    #                 },
    #                 {
    #                     "sound": ["whispers"],
    #                     "text": "I could hear faint whispers in the distance"
    #                 },
    #                 {
    #                     "sound": ["dark sitar"],
    #                     "text": "and every once in a while, the sound of a dark sitar would drift through the air, sending shivers down my spine.",
    #                 }
    #             ]
    #         },
    #     ]
    # }

    config = {
        "project_id": "8c3de4dd",
        "title": "7c87bbd0-a06f-41b1-ba61-ae1faecb9e3d",
        "background_sound": {
            "path": "https://cdn.pixabay.com/download/audio/2022/01/18/audio_3ad65087c4.mp3?filename=thriller-ambient-14563.mp3",
            "start": 40
        },
        "speech": {
            "language": "en-US",
            "voice": "Matthew"
        },
        "paragraphs": [
            {
                "image_title": "79e9e1f1-674b-446e-bd51-2aa3ec0bc64c",
                "image": "https://replicate.delivery/pbxt/DezllqyaMvzcTy1yZWU9WOvtIwoXKeoHNo47ovNremKVDH1gA/out-1.png",
                "text": [
                    {
                        "sound": [
                            "monster growl"
                        ],
                        "text": "The witch in the forest was known to hunt random tourists and eat them."
                    },
                    {
                        "text": "People who stayed too long in the area vanished without a trace."
                    },
                    {
                        "text": "It was said that no one could escape her."
                    },
                    {
                        "text": "Even the bravest adventurers were no match for her."
                    }
                ]
            },
            {
                "image_title": "4cd47a75-ffa1-46fa-94d9-eebee84320fe",
                "image": "https://replicate.delivery/pbxt/6HXE8Lg0R2pwPpD92pfse9RHue457eTPfmP6qVP9agtKRcUDC/out-0.png",
                "text": [
                    {
                        "sound": [
                            "squeak"
                        ],
                        "text": "The locals whispered about the witch and the curses she laid upon those who trespassed her forest."
                    },
                    {
                        "sound": [
                            "concrete footsteps"
                        ],
                        "text": "She was a powerful creature, and her appetite for human flesh was insatiable."
                    },
                    {
                        "sound": [
                            "thunder"
                        ],
                        "text": "Every night she could be heard howling in the darkness."
                    }
                ]
            },
            {
                "image_title": "6449a781-4a6f-481a-a2dc-c62ff50ac655",
                "image": "https://replicate.delivery/pbxt/Tzz1jyCM9fVfq0uvQlrIZ8z6Q4scR7tjADM90QR14f1QFH1gA/out-0.png",
                "text": [
                    {
                        "sound": [
                            "wind"
                        ],
                        "text": "The witch was an ancient spirit, and her presence filled the forest with dread."
                    },
                    {
                        "sound": [
                            "possessed laugh"
                        ],
                        "text": "Her eyes glowed red in the night, and her cackling laughter echoed through the trees."
                    },
                    {
                        "sound": [
                            "glass smash"
                        ],
                        "text": "She was a creature of pure evil, and it seemed no one could stop her."
                    }
                ]
            }
        ]
    }

    generate_story_video_from_config(config)