Diffutoon-ExVideo

Runtime error

App Files Files Community

vilarin commited on Jun 24, 2024

Commit

96fa82a

verified ·

1 Parent(s): 2ba49a8

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -66

app.py CHANGED Viewed

@@ -10,7 +10,8 @@ from glob import glob
 from pathlib import Path
 from typing import Optional
-from diffusers import StableVideoDiffusionPipeline, UNetSpatioTemporalConditionControlNetModel
 from diffusers.utils import load_image, export_to_video
 import uuid
@@ -20,9 +21,6 @@ from huggingface_hub import hf_hub_download
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 # Constants
-base = "stabilityai/stable-video-diffusion-img2vid-xt"
-model = "ECNU-CILab/ExVideo-SVD-128f-v1"
 MAX_SEED = np.iinfo(np.int32).max
 CSS = """
@@ -38,30 +36,15 @@ JS = """function () {
   }
 }"""
-downloaded_model_path = hf_hub_download(
-        repo_id=model,
-        filename=model.fp16.safetensors,
-        local_dir="model"
-)
-MODEL_PATH = "./model/"
 # Ensure model and scheduler are initialized in GPU-enabled function
 if torch.cuda.is_available():
-    unet = UNetSpatioTemporalConditionControlNetModel.from_pretrained(
-        MODEL_PATH,
-        low_cpu_mem_usage=True,
-        variant="fp16",
-    )
-    pipe = StableVideoDiffusionPipeline.from_pretrained(
-    base,
-    unet=unet,
-    torch_dtype=torch.float16,
-    variant="fp16").to("cuda")
 # function source codes modified from multimodalart/stable-video-diffusion
 @spaces.GPU(duration=120)
@@ -69,11 +52,7 @@ def generate(
     image: Image,
     seed: Optional[int] = -1,
     motion_bucket_id: int = 127,
-    fps_id: int = 6,
-    version: str = "svd_xt",
-    cond_aug: float = 0.02,
-    decoding_t: int = 1,
-    device: str = "cuda",
     output_folder: str = "outputs",
     progress=gr.Progress(track_tqdm=True)):
@@ -83,49 +62,29 @@ def generate(
     if image.mode == "RGBA":
         image = image.convert("RGB")
-    generator = torch.manual_seed(seed)
     os.makedirs(output_folder, exist_ok=True)
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
-    frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
     export_to_video(frames, video_path, fps=fps_id)
-    torch.manual_seed(seed)
     return video_path, seed
-def resize_image(image, output_size=(1024, 576)):
-    # Calculate aspect ratios
-    target_aspect = output_size[0] / output_size[1]  # Aspect ratio of the desired size
-    image_aspect = image.width / image.height  # Aspect ratio of the original image
-    # Resize then crop if the original image is larger
-    if image_aspect > target_aspect:
-        # Resize the image to match the target height, maintaining aspect ratio
-        new_height = output_size[1]
-        new_width = int(new_height * image_aspect)
-        resized_image = image.resize((new_width, new_height), Image.LANCZOS)
-        # Calculate coordinates for cropping
-        left = (new_width - output_size[0]) / 2
-        top = 0
-        right = (new_width + output_size[0]) / 2
-        bottom = output_size[1]
-    else:
-        # Resize the image to match the target width, maintaining aspect ratio
-        new_width = output_size[0]
-        new_height = int(new_width / image_aspect)
-        resized_image = image.resize((new_width, new_height), Image.LANCZOS)
-        # Calculate coordinates for cropping
-        left = 0
-        top = (new_height - output_size[1]) / 2
-        right = output_size[0]
-        bottom = (new_height + output_size[1]) / 2
-    # Crop the image
-    cropped_image = resized_image.crop((left, top, right, bottom))
-    return cropped_image
 examples = [
         "./train.jpg",
@@ -162,7 +121,7 @@ with gr.Blocks(css=CSS, js=JS, theme="soft") as demo:
                 fps_id = gr.Slider(
                     label="Frames per second",
                     info="The length of your video in seconds will be 25/fps",
-                    value=6,
                     minimum=5,
                     maximum=30
                 )
@@ -178,8 +137,6 @@ with gr.Blocks(css=CSS, js=JS, theme="soft") as demo:
         examples_per_page=4,
     )
-    image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
     generate_btn.click(fn=generate, inputs=[image, seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video")
 demo.queue().launch()

 from pathlib import Path
 from typing import Optional
+from diffsynth import ModelManager, SVDVideoPipeline, HunyuanDiTImagePipeline
+from diffsynth import ModelManager
 from diffusers.utils import load_image, export_to_video
 import uuid
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 # Constants
 MAX_SEED = np.iinfo(np.int32).max
 CSS = """
   }
 }"""
 # Ensure model and scheduler are initialized in GPU-enabled function
 if torch.cuda.is_available():
+    model_manager = ModelManager(
+        torch_dtype=torch.float16,
+        device="cuda",
+        model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"])
+    pipe = SVDVideoPipeline.from_model_manager(model_manager)
 # function source codes modified from multimodalart/stable-video-diffusion
 @spaces.GPU(duration=120)
     image: Image,
     seed: Optional[int] = -1,
     motion_bucket_id: int = 127,
+    fps_id: int = 25,
     output_folder: str = "outputs",
     progress=gr.Progress(track_tqdm=True)):
     if image.mode == "RGBA":
         image = image.convert("RGB")
+    torch.manual_seed(seed)
     os.makedirs(output_folder, exist_ok=True)
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
+    frames = pipe(
+        input_image=image.resize((512, 512)),
+        num_frames=128,
+        fps=fps_id,
+        height=512,
+        width=512,
+        motion_bucket_id=motion_bucket_id,
+        num_inference_steps=50,
+        min_cfg_scale=2,
+        max_cfg_scale=2,
+        contrast_enhance_scale=1.2
+    ).frames[0]
     export_to_video(frames, video_path, fps=fps_id)
     return video_path, seed
 examples = [
         "./train.jpg",
                 fps_id = gr.Slider(
                     label="Frames per second",
                     info="The length of your video in seconds will be 25/fps",
+                    value=25,
                     minimum=5,
                     maximum=30
                 )
         examples_per_page=4,
     )
     generate_btn.click(fn=generate, inputs=[image, seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video")
 demo.queue().launch()