fixed bug with cuda in detokenizer

2022-06-28 22:02:35 -04:00
parent 764b0bc685
commit 1fbb209623
3 changed files with 5 additions and 5 deletions
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ This is a minimal implementation of [DALL·E Mini](https://github.com/borisdayma

 ### Setup

-Run `sh setup.sh` to install dependencies and download pretrained models.  In the bash script, Git LFS is used to download the VQGan detokenizer from Hugging Face and the Weight & Biases python package is used to download the DALL·E Mini and DALL·E Mega transformer models. These models can also be downloaded manually: 
+Run `sh setup.sh` to install dependencies and download pretrained models.  The models can also be downloaded manually: 
 [VQGan](https://huggingface.co/dalle-mini/vqgan_imagenet_f16_16384), 
 [DALL·E Mini](https://wandb.ai/dalle-mini/dalle-mini/artifacts/DalleBart_model/mini-1/v0/files), 
 [DALL·E Mega](https://wandb.ai/dalle-mini/dalle-mini/artifacts/DalleBart_model/mega-1-fp16/v14/files)
--- a/min_dalle/generate_image.py
+++ b/min_dalle/generate_image.py
@@ -63,7 +63,7 @@ def generate_image_from_text(
            image_token_count = image_token_count
        )
        if image_token_count == config['image_length']:
-            image = detokenize_torch(image_tokens)
+            image = detokenize_torch(image_tokens, is_torch=True)
            return Image.fromarray(image)
        else:
            print(list(image_tokens.to('cpu').detach().numpy()))
@@ -74,5 +74,5 @@ def generate_image_from_text(
            config = config,
            params = params_dalle_bart,
        )
-        image = detokenize_torch(torch.tensor(image_tokens))
+        image = detokenize_torch(torch.tensor(image_tokens), is_torch=False)
        return Image.fromarray(image)
--- a/min_dalle/min_dalle_torch.py
+++ b/min_dalle/min_dalle_torch.py
@@ -104,13 +104,13 @@ def generate_image_tokens_torch(
    return image_tokens


-def detokenize_torch(image_tokens: LongTensor) -> numpy.ndarray:
+def detokenize_torch(image_tokens: LongTensor, is_torch: bool) -> numpy.ndarray:
    print("detokenizing image")
    model_path = './pretrained/vqgan'
    params = load_vqgan_torch_params(model_path)
    detokenizer = VQGanDetokenizer()
    detokenizer.load_state_dict(params)
-    # if torch.cuda.is_available(): detokenizer = detokenizer.cuda()
+    if torch.cuda.is_available() and is_torch: detokenizer = detokenizer.cuda()
    image = detokenizer.forward(image_tokens).to(torch.uint8)
    del detokenizer, params
    return image.to('cpu').detach().numpy()