Spaces:

EuroPython2022
/

illustrated-lyrics-generator

Runtime error

App Files Files Community

sgerard commited on Jul 17, 2022

Commit

e2b81de

1 Parent(s): 752f2e3

Initial commit of working version

Browse files

Files changed (8) hide show

README.md +3 -3
app.py +56 -0
gan_utils.py +31 -0
layers.py +273 -0
models.py +246 -0
requirements.txt +4 -0
text_utils.py +31 -0
utils.py +77 -0

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
 title: Illustrated Lyrics Generator
-emoji: 💻
 colorFrom: indigo
-colorTo: pink
 sdk: gradio
-sdk_version: 3.0.26
 app_file: app.py
 pinned: false
 ---

 ---
 title: Illustrated Lyrics Generator
+emoji: 🎶
 colorFrom: indigo
+colorTo: yellow
 sdk: gradio
+sdk_version: 3.0.24
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import gradio as gr
+from transformers import pipeline
+from text_utils import wrap_text, compute_text_position
+from gan_utils import load_img_generator, generate_img
+from PIL import ImageFont, ImageDraw
+import torch
+# device = 'cuda' if torch.cuda.is_available() else 'cpu'
+device = "cpu"
+text_generator = pipeline('text-generation', model='huggingtweets/bestmusiclyric')
+def generate_captioned_img(lyrics_prompt, gan_model):
+    gan_image = generate_img(device, gan_model)
+    generated_text = text_generator(lyrics_prompt)[0]["generated_text"]
+    wrapped_text = wrap_text(generated_text)
+    text_pos = compute_text_position(wrapped_text)
+    # Source: https://stackoverflow.com/a/16377244
+    draw = ImageDraw.Draw(gan_image)
+    font = ImageFont.truetype("DejaVuSans.ttf", 64)
+    draw.text((10, text_pos), text=wrapped_text, fill_color=(255, 255, 255), font=font, stroke_fill=(0, 0, 0),
+              stroke_width=5)
+    return gan_image
+iface = gr.Interface(fn=generate_captioned_img, inputs=[gr.Textbox(value="Running with the wolves", label="Lyrics prompt", lines=1),
+                                                        gr.Radio(value="aurora",
+                                                            choices=["painting", "fauvism-still-life", "aurora",
+                                                                     "universe", "moongate"],
+                                                                 label="FastGAN model")
+                                                        ],
+                     outputs="image",
+                     allow_flagging="never",
+                     title="Illustrated lyrics generator",
+                     description="Combines song lyrics generation via the [Best Music Lyric Bot]"
+                                 "(https://huggingface.co/huggingtweets/bestmusiclyric) with an artwork randomly "
+                                 "generated by a [FastGAN model](https://huggingface.co/spaces/huggan/FastGan).\n\n"
+                                 "Text and lyrics are generated independently. "
+                                 "If you can implement this idea with images conditioned on the lyrics,"
+                                 " I'd be very interested in seeing that!🤗\n\n"
+                                 "At the bottom of the page, you can click some example inputs to get you started.",
+                     examples=[["Hey now", "fauvism-still-life"], ["It's gonna take a lot", "universe"],
+                               ["Running with the wolves", "aurora"], ["His palms are sweaty", "painting"],
+                               ["I just met you", "moongate"]]
+                     )
+iface.launch()
+#examples=[["Hey now", "painting"], ["It's gonna take a lot", "universe"], ["So close", "aurora"], ["I just met you", "moongate"],
+#                               ["His palms are sweaty", "aurora"]])

gan_utils.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Code adapted from the following sources:
+# https://huggingface.co/huggan/fastgan-few-shot-fauvism-still-life
+# https://huggingface.co/spaces/huggan/FastGan/
+import torch
+from PIL import Image
+from models import Generator
+def load_img_generator(model_name_or_path):
+    generator = Generator(in_channels=256, out_channels=3)
+    generator = generator.from_pretrained(model_name_or_path, in_channels=256, out_channels=3)
+    _ = generator.eval()
+    return generator
+def _denormalize(input: torch.Tensor) -> torch.Tensor:
+    return (input * 127.5) + 127.5
+def generate_img(device, gan_model):
+    img_generator = load_img_generator("huggan/fastgan-few-shot-"+gan_model)
+    noise = torch.zeros(1, 256, 1, 1, device=device).normal_(0.0, 1.0)
+    with torch.no_grad():
+        gan_images, _ = img_generator(noise)
+    gan_image = _denormalize(gan_images.detach()).cpu().squeeze()
+    gan_image = gan_image.permute(1, 2, 0).to("cpu", torch.uint8).numpy()
+    gan_image = Image.fromarray(gan_image)
+    return gan_image

layers.py ADDED Viewed

	@@ -0,0 +1,273 @@

+# Source: https://huggingface.co/huggan/fastgan-few-shot-fauvism-still-life
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.batchnorm import BatchNorm2d
+from torch.nn.utils import spectral_norm
+class SpectralConv2d(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        self._conv = spectral_norm(
+                nn.Conv2d(*args, **kwargs)
+            )
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        return self._conv(input)
+class SpectralConvTranspose2d(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        self._conv = spectral_norm(
+                nn.ConvTranspose2d(*args, **kwargs)
+            )
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        return self._conv(input)
+class Noise(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self._weight = nn.Parameter(
+                torch.zeros(1),
+                requires_grad=True,
+            )
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        batch_size, _, height, width = input.shape
+        noise = torch.randn(batch_size, 1, height, width, device=input.device)
+        return self._weight * noise + input
+class InitLayer(nn.Module):
+    def __init__(self, in_channels: int,
+                       out_channels: int):
+        super().__init__()
+        self._layers = nn.Sequential(
+                SpectralConvTranspose2d(
+                        in_channels=in_channels,
+                        out_channels=out_channels * 2,
+                        kernel_size=4,
+                        stride=1,
+                        padding=0,
+                        bias=False,
+                    ),
+                nn.BatchNorm2d(num_features=out_channels * 2),
+                nn.GLU(dim=1),
+            )
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        return self._layers(input)
+class SLEBlock(nn.Module):
+    def __init__(self, in_channels: int,
+                       out_channels: int):
+        super().__init__()
+        self._layers = nn.Sequential(
+                nn.AdaptiveAvgPool2d(output_size=4),
+                SpectralConv2d(
+                        in_channels=in_channels,
+                        out_channels=out_channels,
+                        kernel_size=4,
+                        stride=1,
+                        padding=0,
+                        bias=False,
+                    ),
+                nn.SiLU(),
+                SpectralConv2d(
+                        in_channels=out_channels,
+                        out_channels=out_channels,
+                        kernel_size=1,
+                        stride=1,
+                        padding=0,
+                        bias=False,
+                    ),
+                nn.Sigmoid(),
+            )
+    def forward(self, low_dim: torch.Tensor,
+                      high_dim: torch.Tensor) -> torch.Tensor:
+        return high_dim * self._layers(low_dim)
+class UpsampleBlockT1(nn.Module):
+    def __init__(self, in_channels: int,
+                       out_channels: int):
+        super().__init__()
+        self._layers = nn.Sequential(
+                nn.Upsample(scale_factor=2, mode='nearest'),
+                SpectralConv2d(
+                        in_channels=in_channels,
+                        out_channels=out_channels * 2,
+                        kernel_size=3,
+                        stride=1,
+                        padding='same',
+                        bias=False,
+                    ),
+                nn.BatchNorm2d(num_features=out_channels * 2),
+                nn.GLU(dim=1),
+            )
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        return self._layers(input)
+class UpsampleBlockT2(nn.Module):
+    def __init__(self, in_channels: int,
+                       out_channels: int):
+        super().__init__()
+        self._layers = nn.Sequential(
+                nn.Upsample(scale_factor=2, mode='nearest'),
+                SpectralConv2d(
+                        in_channels=in_channels,
+                        out_channels=out_channels * 2,
+                        kernel_size=3,
+                        stride=1,
+                        padding='same',
+                        bias=False,
+                    ),
+                Noise(),
+                BatchNorm2d(num_features=out_channels * 2),
+                nn.GLU(dim=1),
+                SpectralConv2d(
+                        in_channels=out_channels,
+                        out_channels=out_channels * 2,
+                        kernel_size=3,
+                        stride=1,
+                        padding='same',
+                        bias=False,
+                    ),
+                Noise(),
+                nn.BatchNorm2d(num_features=out_channels * 2),
+                nn.GLU(dim=1),
+            )
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        return self._layers(input)
+class DownsampleBlockT1(nn.Module):
+    def __init__(self, in_channels: int,
+                       out_channels: int):
+        super().__init__()
+        self._layers = nn.Sequential(
+                SpectralConv2d(
+                        in_channels=in_channels,
+                        out_channels=out_channels,
+                        kernel_size=4,
+                        stride=2,
+                        padding=1,
+                        bias=False,
+                    ),
+                nn.BatchNorm2d(num_features=out_channels),
+                nn.LeakyReLU(negative_slope=0.2),
+            )
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        return self._layers(input)
+class DownsampleBlockT2(nn.Module):
+    def __init__(self, in_channels: int,
+                       out_channels: int):
+        super().__init__()
+        self._layers_1 = nn.Sequential(
+                SpectralConv2d(
+                        in_channels=in_channels,
+                        out_channels=out_channels,
+                        kernel_size=4,
+                        stride=2,
+                        padding=1,
+                        bias=False,
+                    ),
+                nn.BatchNorm2d(num_features=out_channels),
+                nn.LeakyReLU(negative_slope=0.2),
+                SpectralConv2d(
+                        in_channels=out_channels,
+                        out_channels=out_channels,
+                        kernel_size=3,
+                        stride=1,
+                        padding='same',
+                        bias=False,
+                    ),
+                nn.BatchNorm2d(num_features=out_channels),
+                nn.LeakyReLU(negative_slope=0.2),
+            )
+        self._layers_2 = nn.Sequential(
+                nn.AvgPool2d(
+                        kernel_size=2,
+                        stride=2,
+                    ),
+                SpectralConv2d(
+                        in_channels=in_channels,
+                        out_channels=out_channels,
+                        kernel_size=1,
+                        stride=1,
+                        padding=0,
+                        bias=False,
+                    ),
+                nn.BatchNorm2d(num_features=out_channels),
+                nn.LeakyReLU(negative_slope=0.2),
+            )
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        t1 = self._layers_1(input)
+        t2 = self._layers_2(input)
+        return (t1 + t2) / 2
+class Decoder(nn.Module):
+    def __init__(self, in_channels: int,
+                       out_channels: int):
+        super().__init__()
+        self._channels = {
+                16:   128,
+                32:   64,
+                64:   64,
+                128:  32,
+                256:  16,
+                512:  8,
+                1024: 4,
+            }
+        self._layers = nn.Sequential(
+                nn.AdaptiveAvgPool2d(output_size=8),
+                UpsampleBlockT1(in_channels=in_channels,        out_channels=self._channels[16]),
+                UpsampleBlockT1(in_channels=self._channels[16], out_channels=self._channels[32]),
+                UpsampleBlockT1(in_channels=self._channels[32], out_channels=self._channels[64]),
+                UpsampleBlockT1(in_channels=self._channels[64], out_channels=self._channels[128]),
+                SpectralConv2d(
+                        in_channels=self._channels[128],
+                        out_channels=out_channels,
+                        kernel_size=3,
+                        stride=1,
+                        padding='same',
+                        bias=False,
+                    ),
+                nn.Tanh(),
+            )
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        return self._layers(input)

models.py ADDED Viewed

	@@ -0,0 +1,246 @@

+# Source: https://huggingface.co/huggan/fastgan-few-shot-fauvism-still-life
+import torch
+import torch.nn as nn
+from typing import Any, Tuple, Union
+from utils import (
+    ImageType,
+    crop_image_part,
+)
+from layers import (
+    SpectralConv2d,
+    InitLayer,
+    SLEBlock,
+    UpsampleBlockT1,
+    UpsampleBlockT2,
+    DownsampleBlockT1,
+    DownsampleBlockT2,
+    Decoder,
+)
+from huggan.pytorch.huggan_mixin import HugGANModelHubMixin
+class Generator(nn.Module, HugGANModelHubMixin):
+    def __init__(self, in_channels: int,
+                       out_channels: int):
+        super().__init__()
+        self._channels = {
+                4:    1024,
+                8:    512,
+                16:   256,
+                32:   128,
+                64:   128,
+                128:  64,
+                256:  32,
+                512:  16,
+                1024: 8,
+            }
+        self._init = InitLayer(
+                in_channels=in_channels,
+                out_channels=self._channels[4],
+            )
+        self._upsample_8    = UpsampleBlockT2(in_channels=self._channels[4],   out_channels=self._channels[8]   )
+        self._upsample_16   = UpsampleBlockT1(in_channels=self._channels[8],   out_channels=self._channels[16]  )
+        self._upsample_32   = UpsampleBlockT2(in_channels=self._channels[16],  out_channels=self._channels[32]  )
+        self._upsample_64   = UpsampleBlockT1(in_channels=self._channels[32],  out_channels=self._channels[64]  )
+        self._upsample_128  = UpsampleBlockT2(in_channels=self._channels[64],  out_channels=self._channels[128] )
+        self._upsample_256  = UpsampleBlockT1(in_channels=self._channels[128], out_channels=self._channels[256] )
+        self._upsample_512  = UpsampleBlockT2(in_channels=self._channels[256], out_channels=self._channels[512] )
+        self._upsample_1024 = UpsampleBlockT1(in_channels=self._channels[512], out_channels=self._channels[1024])
+        self._sle_64  = SLEBlock(in_channels=self._channels[4],  out_channels=self._channels[64] )
+        self._sle_128 = SLEBlock(in_channels=self._channels[8],  out_channels=self._channels[128])
+        self._sle_256 = SLEBlock(in_channels=self._channels[16], out_channels=self._channels[256])
+        self._sle_512 = SLEBlock(in_channels=self._channels[32], out_channels=self._channels[512])
+        self._out_128 = nn.Sequential(
+                SpectralConv2d(
+                    in_channels=self._channels[128],
+                    out_channels=out_channels,
+                    kernel_size=1,
+                    stride=1,
+                    padding='same',
+                    bias=False,
+                ),
+                nn.Tanh(),
+            )
+        self._out_1024 = nn.Sequential(
+                SpectralConv2d(
+                    in_channels=self._channels[1024],
+                    out_channels=out_channels,
+                    kernel_size=3,
+                    stride=1,
+                    padding='same',
+                    bias=False,
+                ),
+                nn.Tanh(),
+            )
+    def forward(self, input: torch.Tensor) -> \
+            Tuple[torch.Tensor, torch.Tensor]:
+        size_4  = self._init(input)
+        size_8  = self._upsample_8(size_4)
+        size_16 = self._upsample_16(size_8)
+        size_32 = self._upsample_32(size_16)
+        size_64  = self._sle_64 (size_4,  self._upsample_64 (size_32) )
+        size_128 = self._sle_128(size_8,  self._upsample_128(size_64) )
+        size_256 = self._sle_256(size_16, self._upsample_256(size_128))
+        size_512 = self._sle_512(size_32, self._upsample_512(size_256))
+        size_1024 = self._upsample_1024(size_512)
+        out_128  = self._out_128 (size_128)
+        out_1024 = self._out_1024(size_1024)
+        return out_1024, out_128
+class Discriminrator(nn.Module, HugGANModelHubMixin):
+    def __init__(self, in_channels: int):
+        super().__init__()
+        self._channels = {
+                4:    1024,
+                8:    512,
+                16:   256,
+                32:   128,
+                64:   128,
+                128:  64,
+                256:  32,
+                512:  16,
+                1024: 8,
+            }
+        self._init = nn.Sequential(
+                SpectralConv2d(
+                        in_channels=in_channels,
+                        out_channels=self._channels[1024],
+                        kernel_size=4,
+                        stride=2,
+                        padding=1,
+                        bias=False,
+                    ),
+                nn.LeakyReLU(negative_slope=0.2),
+                SpectralConv2d(
+                        in_channels=self._channels[1024],
+                        out_channels=self._channels[512],
+                        kernel_size=4,
+                        stride=2,
+                        padding=1,
+                        bias=False,
+                    ),
+                nn.BatchNorm2d(num_features=self._channels[512]),
+                nn.LeakyReLU(negative_slope=0.2),
+            )
+        self._downsample_256 = DownsampleBlockT2(in_channels=self._channels[512], out_channels=self._channels[256])
+        self._downsample_128 = DownsampleBlockT2(in_channels=self._channels[256], out_channels=self._channels[128])
+        self._downsample_64  = DownsampleBlockT2(in_channels=self._channels[128], out_channels=self._channels[64] )
+        self._downsample_32  = DownsampleBlockT2(in_channels=self._channels[64],  out_channels=self._channels[32] )
+        self._downsample_16  = DownsampleBlockT2(in_channels=self._channels[32],  out_channels=self._channels[16] )
+        self._sle_64 = SLEBlock(in_channels=self._channels[512], out_channels=self._channels[64])
+        self._sle_32 = SLEBlock(in_channels=self._channels[256], out_channels=self._channels[32])
+        self._sle_16 = SLEBlock(in_channels=self._channels[128], out_channels=self._channels[16])
+        self._small_track = nn.Sequential(
+                SpectralConv2d(
+                        in_channels=in_channels,
+                        out_channels=self._channels[256],
+                        kernel_size=4,
+                        stride=2,
+                        padding=1,
+                        bias=False,
+                    ),
+                nn.LeakyReLU(negative_slope=0.2),
+                DownsampleBlockT1(in_channels=self._channels[256], out_channels=self._channels[128]),
+                DownsampleBlockT1(in_channels=self._channels[128], out_channels=self._channels[64] ),
+                DownsampleBlockT1(in_channels=self._channels[64],  out_channels=self._channels[32] ),
+            )
+        self._features_large = nn.Sequential(
+                SpectralConv2d(
+                        in_channels=self._channels[16] ,
+                        out_channels=self._channels[8],
+                        kernel_size=1,
+                        stride=1,
+                        padding=0,
+                        bias=False,
+                    ),
+                nn.BatchNorm2d(num_features=self._channels[8]),
+                nn.LeakyReLU(negative_slope=0.2),
+                SpectralConv2d(
+                        in_channels=self._channels[8],
+                        out_channels=1,
+                        kernel_size=4,
+                        stride=1,
+                        padding=0,
+                        bias=False,
+                    )
+            )
+        self._features_small = nn.Sequential(
+                SpectralConv2d(
+                        in_channels=self._channels[32],
+                        out_channels=1,
+                        kernel_size=4,
+                        stride=1,
+                        padding=0,
+                        bias=False,
+                    ),
+            )
+        self._decoder_large = Decoder(in_channels=self._channels[16], out_channels=3)
+        self._decoder_small = Decoder(in_channels=self._channels[32], out_channels=3)
+        self._decoder_piece = Decoder(in_channels=self._channels[32], out_channels=3)
+    def forward(self, images_1024: torch.Tensor,
+                      images_128: torch.Tensor,
+                      image_type: ImageType) -> \
+            Union[
+                torch.Tensor,
+                Tuple[torch.Tensor, Tuple[Any, Any, Any]]
+            ]:
+        # large track
+        down_512 = self._init(images_1024)
+        down_256 = self._downsample_256(down_512)
+        down_128 = self._downsample_128(down_256)
+        down_64 = self._downsample_64(down_128)
+        down_64 = self._sle_64(down_512, down_64)
+        down_32 = self._downsample_32(down_64)
+        down_32 = self._sle_32(down_256, down_32)
+        down_16 = self._downsample_16(down_32)
+        down_16 = self._sle_16(down_128, down_16)
+        # small track
+        down_small = self._small_track(images_128)
+        # features
+        features_large = self._features_large(down_16).view(-1)
+        features_small = self._features_small(down_small).view(-1)
+        features = torch.cat([features_large, features_small], dim=0)
+        # decoder
+        if image_type != ImageType.FAKE:
+            dec_large = self._decoder_large(down_16)
+            dec_small = self._decoder_small(down_small)
+            dec_piece = self._decoder_piece(crop_image_part(down_32, image_type))
+            return features, (dec_large, dec_small, dec_piece)
+        return features

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+transformers
+torch
+git+https://github.com/huggingface/community-events@main
+gradio

text_utils.py ADDED Viewed

	@@ -0,0 +1,31 @@

+def wrap_text(generated_text):
+    wrapping_text = ""
+    current_line_length = 0
+    print(generated_text)
+    if "-" in generated_text:
+        quote, author = generated_text.split("-")
+    elif "―" in generated_text:
+        quote, author = generated_text.split("―")
+    else:
+        quote = generated_text
+        author = None
+    for word in quote.split(" "):
+        if current_line_length >= 20:
+            wrapping_text += f"\n{word} "
+            current_line_length = len(word)
+        else:
+            wrapping_text += f"{word} "
+            current_line_length += len(word)
+    if author is not None:
+        wrapping_text += f"\n- {author}"
+    return wrapping_text
+def compute_text_position(wrapped_text):
+    img_height = 1024
+    line_height_in_px = 74  # roughly estimated
+    margin_bottom = 100  # align text close to the bottom, leaving this many pixels free
+    n_lines = wrapped_text.count("\n") + 1
+    text_height = n_lines * line_height_in_px
+    text_pos = img_height - margin_bottom - text_height
+    return text_pos

utils.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Source: https://huggingface.co/huggan/fastgan-few-shot-fauvism-still-life
+import torch
+import torch.nn as nn
+from enum import Enum
+import base64
+import json
+from io import BytesIO
+from PIL import Image
+import requests
+import re
+class ImageType(Enum):
+    REAL_UP_L = 0
+    REAL_UP_R = 1
+    REAL_DOWN_R = 2
+    REAL_DOWN_L = 3
+    FAKE = 4
+def crop_image_part(image: torch.Tensor,
+                    part: ImageType) -> torch.Tensor:
+    size = image.shape[2] // 2
+    if part == ImageType.REAL_UP_L:
+        return image[:, :, :size, :size]
+    elif part == ImageType.REAL_UP_R:
+        return image[:, :, :size, size:]
+    elif part == ImageType.REAL_DOWN_L:
+        return image[:, :, size:, :size]
+    elif part == ImageType.REAL_DOWN_R:
+        return image[:, :, size:, size:]
+    else:
+        raise ValueError('invalid part')
+def init_weights(module: nn.Module):
+    if isinstance(module, nn.Conv2d):
+        torch.nn.init.normal_(module.weight, 0.0, 0.02)
+    if isinstance(module, nn.BatchNorm2d):
+        torch.nn.init.normal_(module.weight, 1.0, 0.02)
+        module.bias.data.fill_(0)
+def load_image_from_local(image_path, image_resize=None):
+    image = Image.open(image_path)
+    if isinstance(image_resize, tuple):
+        image = image.resize(image_resize)
+    return image
+def load_image_from_url(image_url, rgba_mode=False, image_resize=None, default_image=None):
+    try:
+        image = Image.open(requests.get(image_url, stream=True).raw)
+        if rgba_mode:
+            image = image.convert("RGBA")
+        if isinstance(image_resize, tuple):
+            image = image.resize(image_resize)
+    except Exception as e:
+        image = None
+        if default_image:
+            image = load_image_from_local(default_image, image_resize=image_resize)
+    return image
+def image_to_base64(image_array):
+    buffered = BytesIO()
+    image_array.save(buffered, format="PNG")
+    image_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return f"data:image/png;base64, {image_b64}"