Spaces:

exact-railcar
/

test

Runtime error

App Files Files Community

exact-railcar commited on 17 days ago

Commit

1716635

verified ·

1 Parent(s): dbb5b65

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -0

app.py CHANGED Viewed

@@ -1,5 +1,150 @@
 import gradio
 def greet(name, intensity):
     return "Hello, " + name + "!" * int(intensity)

 import gradio
+import basicsr, realesrgan, gfpgan, av, pathlib, diffusers, torch, transformers, builtins, numpy, re
+from animatediff.generate import controlnet_preprocess, img2img_preprocess, wild_card_conversion, region_preprocess, unload_controlnet_models
+from animatediff.settings import get_model_config, get_infer_config
+from animatediff.utils.pipeline import send_to_device
+from animatediff.utils.util import set_tensor_interpolation_method
+from animatediff.pipelines import load_text_embeddings
+from animatediff.pipelines.lora import load_lcm_lora
+import huggingface_hub
+import animatediff
+width=432
+height=768
+length=1440
+model_config = get_model_config('config/prompts/prompt_travel.json')
+is_sdxl = False
+infer_config = get_infer_config(True, is_sdxl)
+set_tensor_interpolation_method(model_config.tensor_interpolation_slerp)
+device = torch.device('cuda')
+save_dir = pathlib.Path('output')
+controlnet_image_map, controlnet_type_map, controlnet_ref_map, controlnet_no_shrink = controlnet_preprocess(model_config.controlnet_map, width, height, length, save_dir, device, is_sdxl)
+img2img_map = img2img_preprocess(model_config.img2img_map, width, height, length, save_dir)
+base_model = pathlib.Path('/tmp/base')
+diffusers.StableDiffusionPipeline.from_pretrained('stable-diffusion-v1-5/stable-diffusion-v1-5').save_pretrained(base_model)
+tokenizer = transformers.CLIPTokenizer.from_pretrained(base_model, subfolder='tokenizer')
+text_encoder = transformers.CLIPTextModel.from_pretrained(base_model, subfolder='text_encoder')
+vae = diffusers.AutoencoderKL.from_single_file('https://huggingface.co/chaowenguoback/pal/blob/main/vae-ft-mse-840000-ema-pruned.safetensors')
+huggingface_hub.hf_hub_download(repo_id='wangfuyun/AnimateLCM', filename='AnimateLCM_sd15_t2v.ckpt', local_dir=pathlib.Path.cwd())
+unet = animatediff.models.unet.UNet2DConditionModel.from_pretrained_2d(
+    pretrained_model_path=base_model,
+    motion_module_path=pathlib.Path.cwd().joinpath('AnimateLCM_sd15_t2v.ckpt'),
+    subfolder='unet',
+    unet_additional_kwargs=infer_config.unet_additional_kwargs,
+    feature_extractor = transformers.CLIPImageProcessor.from_pretrained(base_model, subfolder='feature_extractor')
+pipeline = diffusers.StableDiffusionPipeline.from_single_file('https://huggingface.co/chaowenguoback/15/blob/main/chilloutMix-Ni.safetensors', config='stable-diffusion-v1-5/stable-diffusion-v1-5', safety_checker=None, use_safetensors=True)
+unet.load_state_dict(pipeline.unet.state_dict(), strict=False)
+text_encoder.load_state_dict(pipeline.text_encoder.state_dict(), strict=False)
+del pipeline
+unet.enable_xformers_memory_efficient_attention()
+pipeline = animatediff.pipelines.AnimationPipeline(
+    vae=vae,
+    text_encoder=text_encoder,
+    tokenizer=tokenizer,
+    unet=unet,
+    scheduler=diffusers.LCMScheduler.from_config(infer_config.noise_scheduler_kwargs),
+    feature_extractor=feature_extractor,
+    controlnet_map=None,
+)
+lcm_lora = pathlib.Path.cwd().joinpath('data/models/lcm_lora/sd15')
+lcm_lora.mkdir(parents=True)
+huggingface_hub.hf_hub_download(repo_id='wangfuyun/AnimateLCM', filename='AnimateLCM_sd15_t2v_lora.safetensors', local_dir=lcm_lora)
+load_lcm_lora(pipeline, {'start_scale':0.15, 'end_scale':0.75, 'gradient_start':0.2, 'gradient_end':0.75}, is_sdxl=is_sdxl)
+pipeline.lora_map = None
+pipeline.load_lora_weights('chaowenguoback/15', weight_name='add_detail.safetensors', adapter_name='detail')
+pipeline.load_lora_weights('chaowenguoback/15', weight_name='b1r1av5-000007.safetensors', adapter_name='bikini')
+pipeline.load_lora_weights('chaowenguoback/15', weight_name='btcstr.safetensors', adapter_name='c-string')
+pipeline.load_lora_weights('chaowenguoback/15', weight_name='蓝洁瑛.safetensors', adapter_name='character')
+pipeline.set_adapters(['detail', 'bikini', 'c-string', 'character'], [1, 0.4, 0.2, 0.8])
+pipeline.unet = pipeline.unet.half()
+pipeline.text_encoder = pipeline.text_encoder.half()
+pipeline.text_encoder = pipeline.text_encoder.to(device)
+load_text_embeddings(pipeline)
+pipeline.text_encoder = pipeline.text_encoder.to('cpu')
+pipeline = send_to_device(pipeline, device, freeze=True, force_half=False, compile=False, is_sdxl=is_sdxl)
+wild_card_conversion(model_config)
+is_init_img_exist = img2img_map != None
+region_condi_list, region_list, ip_adapter_config_map, region2index = region_preprocess(model_config, width, height, length, save_dir, is_init_img_exist, is_sdxl)
+if controlnet_type_map:
+    for c in controlnet_type_map:
+        tmp_r = [region2index[r] for r in controlnet_type_map[c]["control_region_list"]]
+        controlnet_type_map[c]["control_region_list"] = [r for r in tmp_r if r != -1]
+prompt_map = region_condi_list[0]["prompt_map"]
+prompt_tags = [re.compile(r"[^\w\-, ]").sub("", tag).strip().replace(" ", "-") for tag in prompt_map[list(prompt_map.keys())[0]].split(",")]
+prompt_str = "_".join((prompt_tags[:6]))[:50]
+output = pipeline(
+    n_prompt='nipple, waistband, back view, monochrome, longbody, lowres, bad anatomy, bad hands, fused fingers, missing fingers, too many fingers, cropped, worst quality, low quality, deformed body, bloated, ugly, unrealistic, extra hands and arms',
+    num_inference_steps=8,
+    guidance_scale=3,
+    unet_batch_size=1,
+    width=width,
+    height=height,
+    video_length=length,
+    return_dict=False,
+    context_frames=16,
+    context_stride=1,
+    context_overlap=16 // 4,
+    context_schedule='composite',
+    clip_skip=2,
+    controlnet_type_map=controlnet_image_map,
+    controlnet_image_map=controlnet_image_map,
+    controlnet_ref_map=controlnet_ref_map,
+    controlnet_no_shrink=controlnet_no_shrink,
+    controlnet_max_samples_on_vram=model_config.controlnet_map["max_samples_on_vram"] if "max_samples_on_vram" in model_config.controlnet_map else 999,
+    controlnet_max_models_on_vram=model_config.controlnet_map["max_models_on_vram"] if "max_models_on_vram" in model_config.controlnet_map else 99,
+    controlnet_is_loop = model_config.controlnet_map["is_loop"] if "is_loop" in model_config.controlnet_map else True,
+    img2img_map=img2img_map,
+    ip_adapter_config_map=ip_adapter_config_map,
+    region_list=region_list,
+    region_condi_list=region_condi_list,
+    interpolation_factor=1,
+    is_single_prompt_mode=model_config.is_single_prompt_mode,
+    gradual_latent_map=model_config.gradual_latent_hires_fix_map,
+    callback=None,
+    callback_steps=None,
+)
+unload_controlnet_models(pipe=pipeline)
+frames = output.permute(0, 2, 1, 3, 4).squeeze(0)
+frames = frames.mul(255).add_(0.5).clamp_(0, 255).permute(0, 2, 3, 1).to("cpu", torch.uint8).numpy()
+del pipeline
+torch.cuda.empty_cache()
+pipeline = diffusers.AudioLDM2Pipeline.from_pretrained('cvssp/audioldm2-music', torch_dtype=torch.float16).to('cuda')
+pipeline.scheduler = diffusers.DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)
+music = pipeline(prompt='Light rhythm techno', negative_prompt='low quality, average quality', num_inference_steps=20, audio_length_in_s=180).audios[0]
+del pipeline
+torch.cuda.empty_cache()
+model = basicsr.archs.rrdbnet_arch.RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+upsampler = realesrgan.RealESRGANer(scale=4, model_path='https://huggingface.co/chaowenguoback/pal/resolve/main/RealESRGAN_x4plus.pth', model=model, half=True, device='cuda')
+face_enhancer = gfpgan.GFPGANer(model_path='https://huggingface.co/chaowenguoback/pal/resolve/main/GFPGANv1.4.pth',upscale=4, bg_upsampler=upsampler)
+with av.open('video.mp4', mode='w') as writer:
+    video = writer.add_stream('h264', rate=8)
+    video.width = width * 4
+    video.height = height * 4
+    video.pix_fmt = 'yuv420p'
+    audio = writer.add_stream('aac', rate=16000)
+    for frame in frames: writer.mux(video.encode(av.VideoFrame.from_ndarray(face_enhancer.enhance(frame)[-1])))
+    writer.mux(video.encode())
+    for _ in builtins.range(0, music.shape[0], audio.frame_size):
+        frame = av.AudioFrame.from_ndarray(music[_:_ + audio.frame_size][None], format='fltp', layout='mono')
+        frame.sample_rate = audio.sample_rate
+        frame.pts = _
+        writer.mux(audio.encode(frame))
+    writer.mux(audio.encode())
 def greet(name, intensity):
     return "Hello, " + name + "!" * int(intensity)