From eb9f4c6b3b073d4f9534579acf84f7489b78467d Mon Sep 17 00:00:00 2001 From: Chenxi Date: Wed, 29 Jun 2022 19:50:10 +0100 Subject: [PATCH] replicate demo --- README.md | 3 ++- cog.yaml | 14 ++++++++++++++ predict.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 cog.yaml create mode 100644 predict.py diff --git a/README.md b/README.md index a268946..3f84228 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # min(DALL·E) -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kuprel/min-dalle/blob/main/min_dalle.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kuprel/min-dalle/blob/main/min_dalle.ipynb) \ +Try Replicate web demo here [![Replicate](https://replicate.com/kuprel/min-dalle/badge)](https://replicate.com/kuprel/min-dalle) This is a minimal implementation of [DALL·E Mini](https://github.com/borisdayma/dalle-mini). It has been stripped to the bare essentials necessary for doing inference, and converted to PyTorch. The only third party dependencies are `numpy` and `torch` for the torch model and `flax` for the flax model. diff --git a/cog.yaml b/cog.yaml new file mode 100644 index 0000000..269e8bf --- /dev/null +++ b/cog.yaml @@ -0,0 +1,14 @@ +build: + cuda: "11.0" + gpu: true + python_version: "3.8" + system_packages: + - "libgl1-mesa-glx" + - "libglib2.0-0" + python_packages: + - "ipython==7.21.0" + - "torch==1.10.1" + - "flax==0.4.2" + - "wandb==0.12.16" + +predict: "predict.py:Predictor" diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..f8c1e3d --- /dev/null +++ b/predict.py @@ -0,0 +1,53 @@ +import tempfile +from PIL import Image +from cog import BasePredictor, Path, Input + +from min_dalle.generate_image import load_dalle_bart_metadata, tokenize_text +from min_dalle.load_params import load_dalle_bart_flax_params +from min_dalle.min_dalle_torch import generate_image_tokens_torch, detokenize_torch + + +class Predictor(BasePredictor): + def setup(self): + self.model_path = { + "mini": "pretrained/dalle_bart_mini", + "mega": "pretrained/dalle_bart_mega", + } + self.configs = { + k: load_dalle_bart_metadata(self.model_path[k]) + for k in self.model_path.keys() + } + + def predict( + self, + text: str = Input( + description="Text for generating images.", + ), + model: str = Input( + choices=["mini", "mega"], + description="Choose mini or mega model.", + ), + seed: int = Input( + description="Specify the seed.", + ), + ) -> Path: + + config, vocab, merges = self.configs[model] + text_tokens = tokenize_text(text, config, vocab, merges) + params_dalle_bart = load_dalle_bart_flax_params(self.model_path[model]) + + image_token_count = config["image_length"] + image_tokens = generate_image_tokens_torch( + text_tokens=text_tokens, + seed=seed, + config=config, + params=params_dalle_bart, + image_token_count=image_token_count, + ) + + image = detokenize_torch(image_tokens, is_torch=True) + image = Image.fromarray(image) + out_path = Path(tempfile.mkdtemp()) / "output.png" + image.save(str(out_path)) + + return out_path