diff --git a/README.md b/README.md index c01bf17..e500075 100644 --- a/README.md +++ b/README.md @@ -34,46 +34,46 @@ model = MinDalle(is_mega=True, models_root='./pretrained') The required models will be downloaded to `models_root` if they are not already there. Once everything has finished initializing, call `generate_image` with some text and a seed as many times as you want. ```python -text = 'Dali painting of WallE' +text = 'Dali painting of WALL·E' image = model.generate_image(text, seed=0, grid_size=4) display(image) ``` -drawing +min-dalle ```python text = 'Rusty Iron Man suit found abandoned in the woods being reclaimed by nature' image = model.generate_image(text, seed=0, grid_size=3) display(image) ``` -drawing +min-dalle ```python text = 'court sketch of godzilla on trial' image = model.generate_image(text, seed=6, grid_size=3) display(image) ``` -drawing +min-dalle ```python text = 'a funeral at Whole Foods' image = model.generate_image(text, seed=10, grid_size=3) display(image) ``` -drawing +min-dalle ```python text = 'Jesus turning water into wine on Americas Got Talent' image = model.generate_image(text, seed=2, grid_size=3) display(image) ``` -drawing +min-dalle ```python text = 'cctv footage of Yoda robbing a liquor store' image = model.generate_image(text, seed=0, grid_size=3) display(image) ``` -drawing +min-dalle ### Command Line @@ -83,9 +83,9 @@ Use `image_from_text.py` to generate images from the command line. ```bash $ python image_from_text.py --text='artificial intelligence' --seed=7 ``` -drawing +min-dalle ```bash $ python image_from_text.py --text='trail cam footage of gollum eating watermelon' --mega --seed=1 --grid-size=3 ``` -drawing +min-dalle diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..1871749 --- /dev/null +++ b/README.rst @@ -0,0 +1,95 @@ +min(DALL·E) +=========== + +|Open In Colab|   |Replicate|   |Join us on Discord| + +This is a fast, minimal implementation of Boris Dayma’s `DALL·E +Mini `__. It has been stripped +down for inference and converted to PyTorch. The only third party +dependencies are numpy, requests, pillow and torch. + +To generate a 3x3 grid of DALL·E Mega images it takes - **35 seconds** +with a P100 in Colab - **15 seconds** with an A100 on Replicate - +**TBD** with an H100 (@NVIDIA?) + +The flax model and code for converting it to torch can be found +`here `__. + +Install +------- + +.. code:: bash + + $ pip install min-dalle + +Usage +----- + +Load the model parameters once and reuse the model to generate multiple +images. + +.. code:: python + + from min_dalle import MinDalle + + model = MinDalle(is_mega=True, models_root='./pretrained') + +The required models will be downloaded to ``models_root`` if they are +not already there. Once everything has finished initializing, call +``generate_image`` with some text and a seed as many times as you want. + +.. code:: python + + text = 'Dali painting of WallE' + image = model.generate_image(text, seed=0, grid_size=4) + display(image) + +.. code:: python + + text = 'Rusty Iron Man suit found abandoned in the woods being reclaimed by nature' + image = model.generate_image(text, seed=0, grid_size=3) + display(image) + +.. code:: python + + text = 'court sketch of godzilla on trial' + image = model.generate_image(text, seed=6, grid_size=3) + display(image) + +.. code:: python + + text = 'a funeral at Whole Foods' + image = model.generate_image(text, seed=10, grid_size=3) + display(image) + +.. code:: python + + text = 'Jesus turning water into wine on Americas Got Talent' + image = model.generate_image(text, seed=2, grid_size=3) + display(image) + +.. code:: python + + text = 'cctv footage of Yoda robbing a liquor store' + image = model.generate_image(text, seed=0, grid_size=3) + display(image) + +Command Line +~~~~~~~~~~~~ + +Use ``image_from_text.py`` to generate images from the command line. + +.. code:: bash + + $ python image_from_text.py --text='artificial intelligence' --seed=7 + +.. code:: bash + + $ python image_from_text.py --text='trail cam footage of gollum eating watermelon' --mega --seed=1 --grid-size=3 + +.. |Open In Colab| image:: https://colab.research.google.com/assets/colab-badge.svg + :target: https://colab.research.google.com/github/kuprel/min-dalle/blob/main/min_dalle.ipynb +.. |Replicate| image:: https://replicate.com/kuprel/min-dalle/badge + :target: https://replicate.com/kuprel/min-dalle +.. |Join us on Discord| image:: https://img.shields.io/discord/823813159592001537?color=5865F2&logo=discord&logoColor=white + :target: https://discord.gg/xBPBXfcFHd diff --git a/cog.yaml b/cog.yaml index 6a04d58..55d328d 100644 --- a/cog.yaml +++ b/cog.yaml @@ -6,7 +6,7 @@ build: - "libgl1-mesa-glx" - "libglib2.0-0" python_packages: - - "min-dalle==0.2.12" + - "min-dalle==0.2.13" run: - pip install torch==1.10.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html diff --git a/setup.py b/setup.py index 7271488..1949018 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,8 @@ from pathlib import Path setuptools.setup( name='min-dalle', description = 'min(DALL·E)', - long_description=(Path(__file__).parent / "README").read_text(), - version='0.2.12', + long_description=(Path(__file__).parent / "README.rst").read_text(), + version='0.2.13', author='Brett Kuprel', author_email='brkuprel@gmail.com', url='https://github.com/kuprel/min-dalle',