Spaces:
Running
Running
Linoy Tsaban
commited on
Commit
·
f06d376
1
Parent(s):
0ba4738
Update index.html
Browse files- index.html +120 -92
index.html
CHANGED
|
@@ -27,19 +27,21 @@
|
|
| 27 |
<script src="./static/js/bulma-slider.min.js"></script>
|
| 28 |
<script src="./static/js/index.js"></script>
|
| 29 |
<style>
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
| 43 |
</style>
|
| 44 |
</head>
|
| 45 |
<body>
|
|
@@ -56,12 +58,13 @@
|
|
| 56 |
|
| 57 |
|
| 58 |
<section class="hero">
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
| 65 |
<span class="author-block">
|
| 66 |
<a href="https://scholar.google.com/citations?user=kJ9Abf8AAAAJ&hl=en">Manuel Brack</a>¹²,
|
| 67 |
</span>
|
|
@@ -84,7 +87,7 @@
|
|
| 84 |
<a href="https://twitter.com/multimodalart">Apolinário Passos</a>⁴
|
| 85 |
</span>
|
| 86 |
<p></p>
|
| 87 |
-
|
| 88 |
<div class="is-size-5 publication-authors">
|
| 89 |
<span class="author-block">¹ German Research Center for Artificial Intelligence (DFKI),</span>
|
| 90 |
<span class="author-block">² Computer Science Department, TU Darmstadt,</span>
|
|
@@ -93,10 +96,10 @@
|
|
| 93 |
<span class="author-block">⁵ Centre for Cognitive Science, TU Darmstadt,</span>
|
| 94 |
<span class="author-block">⁶ LAION</span>
|
| 95 |
</div>
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
<a href="#"
|
| 101 |
class="external-link button is-normal is-rounded is-dark">
|
| 102 |
<span class="icon">
|
|
@@ -105,16 +108,16 @@
|
|
| 105 |
<span>arXiv</span>
|
| 106 |
</a>
|
| 107 |
</span>
|
| 108 |
-
|
| 109 |
-
|
| 110 |
<a href="https://huggingface.co/spaces/editing-images/ledtisplusplus"
|
| 111 |
target="_blank"
|
| 112 |
class="external-link button is-normal is-rounded is-dark">
|
| 113 |
<span>🤗 Demo</span>
|
| 114 |
</a>
|
| 115 |
</span>
|
| 116 |
-
|
| 117 |
-
|
| 118 |
<a href="https://huggingface.co/spaces/editing-images/ledtisplusplus/tree/main"
|
| 119 |
target="_blank"
|
| 120 |
class="external-link button is-normal is-rounded is-dark">
|
|
@@ -124,12 +127,12 @@
|
|
| 124 |
<span>Code</span>
|
| 125 |
</a>
|
| 126 |
</span>
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
| 129 |
</div>
|
| 130 |
-
|
| 131 |
-
</div>
|
| 132 |
-
</div>
|
| 133 |
</section>
|
| 134 |
|
| 135 |
<section class="hero teaser">
|
|
@@ -140,10 +143,6 @@
|
|
| 140 |
<source src="static/videos/faces.mp4"
|
| 141 |
type="video/mp4">
|
| 142 |
</video>
|
| 143 |
-
<video autoplay muted loop playsinline height="100%">
|
| 144 |
-
<source src="static/videos/objects_styles.mp4"
|
| 145 |
-
type="video/mp4">
|
| 146 |
-
</video>
|
| 147 |
|
| 148 |
|
| 149 |
<h2 class="subtitle has-text-centered">
|
|
@@ -193,10 +192,9 @@
|
|
| 193 |
</section>
|
| 194 |
|
| 195 |
|
| 196 |
-
|
| 197 |
<section class="section">
|
| 198 |
<div class="container is-max-desktop">
|
| 199 |
-
|
| 200 |
<div class="columns is-centered has-text-centered">
|
| 201 |
<h2 class="title is-3">LEDITS++: Efficient and Versatile Textual Image Editing</h2>
|
| 202 |
</div>
|
|
@@ -270,7 +268,9 @@
|
|
| 270 |
<p>
|
| 271 |
Utilizing T2I models for editing real images is usually done by inverting the sampling
|
| 272 |
process to identify a noisy xT that will be denoised to the input image x0.
|
| 273 |
-
We draw characteristics from <a href="https://inbarhub.github.io/DDPM_inversion/"
|
|
|
|
|
|
|
| 274 |
inversion method that greatly reduces the required number
|
| 275 |
of steps while maintaining no reconstruction error.
|
| 276 |
DDPM can be viewed as a first-order
|
|
@@ -283,67 +283,95 @@
|
|
| 283 |
<img src="static/images/inversion.png"/>
|
| 284 |
</div>
|
| 285 |
<div class="content">
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
<div class="content">
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
</div>
|
| 317 |
-
|
| 318 |
</div>
|
| 319 |
</div>
|
| 320 |
<div class="columns is-centered has-text-centered">
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
</div>
|
| 328 |
|
| 329 |
</section>
|
| 330 |
<section class="section">
|
| 331 |
<div class="container is-max-desktop">
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
|
| 342 |
</div>
|
| 343 |
</section>
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
<!--<section class="hero teaser">-->
|
| 348 |
<!-- <div class="container is-max-desktop">-->
|
| 349 |
<!-- <div class="hero-body">-->
|
|
@@ -359,15 +387,15 @@
|
|
| 359 |
<!-- </div>-->
|
| 360 |
<!--</section>-->
|
| 361 |
|
| 362 |
-
|
| 363 |
|
| 364 |
|
| 365 |
<section class="section" id="BibTeX">
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
}</code></pre>
|
| 370 |
-
|
| 371 |
</section>
|
| 372 |
|
| 373 |
|
|
|
|
| 27 |
<script src="./static/js/bulma-slider.min.js"></script>
|
| 28 |
<script src="./static/js/index.js"></script>
|
| 29 |
<style>
|
| 30 |
+
.publication-links a {
|
| 31 |
+
color: white !important
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
@media only screen and (max-width: 900px) {
|
| 35 |
+
.columns {
|
| 36 |
+
overflow-y: scroll;
|
| 37 |
+
}
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
@media only screen and (min-width: 901px) {
|
| 41 |
+
.is-centered img {
|
| 42 |
+
width: 80vw !important
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
</style>
|
| 46 |
</head>
|
| 47 |
<body>
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
<section class="hero">
|
| 61 |
+
<div class="hero-body">
|
| 62 |
+
<div class="container is-max-desktop">
|
| 63 |
+
<div class="columns is-centered">
|
| 64 |
+
<div class="column has-text-centered">
|
| 65 |
+
<h1 class="title is-1 publication-title">LEDITS++: Limitless Image Editing using Text-to-Image
|
| 66 |
+
Models</h1>
|
| 67 |
+
<div class="is-size-5 publication-authors">
|
| 68 |
<span class="author-block">
|
| 69 |
<a href="https://scholar.google.com/citations?user=kJ9Abf8AAAAJ&hl=en">Manuel Brack</a>¹²,
|
| 70 |
</span>
|
|
|
|
| 87 |
<a href="https://twitter.com/multimodalart">Apolinário Passos</a>⁴
|
| 88 |
</span>
|
| 89 |
<p></p>
|
| 90 |
+
|
| 91 |
<div class="is-size-5 publication-authors">
|
| 92 |
<span class="author-block">¹ German Research Center for Artificial Intelligence (DFKI),</span>
|
| 93 |
<span class="author-block">² Computer Science Department, TU Darmstadt,</span>
|
|
|
|
| 96 |
<span class="author-block">⁵ Centre for Cognitive Science, TU Darmstadt,</span>
|
| 97 |
<span class="author-block">⁶ LAION</span>
|
| 98 |
</div>
|
| 99 |
+
<div class="column has-text-centered">
|
| 100 |
+
<div class="publication-links">
|
| 101 |
+
<!-- arxiv Link. -->
|
| 102 |
+
<span class="link-block">
|
| 103 |
<a href="#"
|
| 104 |
class="external-link button is-normal is-rounded is-dark">
|
| 105 |
<span class="icon">
|
|
|
|
| 108 |
<span>arXiv</span>
|
| 109 |
</a>
|
| 110 |
</span>
|
| 111 |
+
<!-- Demo Link. -->
|
| 112 |
+
<span class="link-block">
|
| 113 |
<a href="https://huggingface.co/spaces/editing-images/ledtisplusplus"
|
| 114 |
target="_blank"
|
| 115 |
class="external-link button is-normal is-rounded is-dark">
|
| 116 |
<span>🤗 Demo</span>
|
| 117 |
</a>
|
| 118 |
</span>
|
| 119 |
+
<!-- Code Link. -->
|
| 120 |
+
<span class="link-block">
|
| 121 |
<a href="https://huggingface.co/spaces/editing-images/ledtisplusplus/tree/main"
|
| 122 |
target="_blank"
|
| 123 |
class="external-link button is-normal is-rounded is-dark">
|
|
|
|
| 127 |
<span>Code</span>
|
| 128 |
</a>
|
| 129 |
</span>
|
| 130 |
+
</div>
|
| 131 |
+
</div>
|
| 132 |
+
</div>
|
| 133 |
+
</div>
|
| 134 |
</div>
|
| 135 |
+
</div>
|
|
|
|
|
|
|
| 136 |
</section>
|
| 137 |
|
| 138 |
<section class="hero teaser">
|
|
|
|
| 143 |
<source src="static/videos/faces.mp4"
|
| 144 |
type="video/mp4">
|
| 145 |
</video>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
|
| 148 |
<h2 class="subtitle has-text-centered">
|
|
|
|
| 192 |
</section>
|
| 193 |
|
| 194 |
|
|
|
|
| 195 |
<section class="section">
|
| 196 |
<div class="container is-max-desktop">
|
| 197 |
+
<!-- Introduction -->
|
| 198 |
<div class="columns is-centered has-text-centered">
|
| 199 |
<h2 class="title is-3">LEDITS++: Efficient and Versatile Textual Image Editing</h2>
|
| 200 |
</div>
|
|
|
|
| 268 |
<p>
|
| 269 |
Utilizing T2I models for editing real images is usually done by inverting the sampling
|
| 270 |
process to identify a noisy xT that will be denoised to the input image x0.
|
| 271 |
+
We draw characteristics from <a href="https://inbarhub.github.io/DDPM_inversion/"
|
| 272 |
+
target="_blank">edit friendly DDPM inversion</a> and propose
|
| 273 |
+
an efficient
|
| 274 |
inversion method that greatly reduces the required number
|
| 275 |
of steps while maintaining no reconstruction error.
|
| 276 |
DDPM can be viewed as a first-order
|
|
|
|
| 283 |
<img src="static/images/inversion.png"/>
|
| 284 |
</div>
|
| 285 |
<div class="content">
|
| 286 |
+
<h2 class="title is-4">Component 2: Textual Editing</h2>
|
| 287 |
+
<p>
|
| 288 |
+
After creating our re-construction sequence, we can edit the image by manipulating
|
| 289 |
+
the noise estimate εθ based on a set of edit instructions. We devise a dedicated
|
| 290 |
+
guidance term for each concept based on conditioned and unconditioned estimate. We
|
| 291 |
+
define LEDITS++ guidance such that it both reflects the direction of the edit (if we
|
| 292 |
+
want
|
| 293 |
+
to push away from/towards the edit concept) and maximizes fine-grained control over
|
| 294 |
+
the effect of the desired edit.
|
| 295 |
+
|
| 296 |
+
</p>
|
| 297 |
+
<img src="static/images/textual_editing.png"/>
|
| 298 |
+
</div>
|
| 299 |
<div class="content">
|
| 300 |
+
<h2 class="title is-4">Component 3: Semantic Grounding</h2>
|
| 301 |
+
<p>
|
| 302 |
+
In our defined LEDITS++ guidance, we include a masking term composed of the
|
| 303 |
+
intersection between the mask generated from
|
| 304 |
+
the U-Net’s cross-attention layers and a mask derived from
|
| 305 |
+
the noise estimate - yielding a mask both focused on relevant image
|
| 306 |
+
regions and of fine granularity.
|
| 307 |
+
We empirically demonstrate that these maps can also capture regions
|
| 308 |
+
of an image relevant to an editing concept that is not already present.
|
| 309 |
+
Specifically for multiple edits, calculating a
|
| 310 |
+
dedicated mask for each edit prompt ensures that the corresponding
|
| 311 |
+
guidance terms remain largely isolated, limiting
|
| 312 |
+
interference between them.
|
| 313 |
+
|
| 314 |
+
</p>
|
| 315 |
+
|
| 316 |
</div>
|
| 317 |
+
|
| 318 |
</div>
|
| 319 |
</div>
|
| 320 |
<div class="columns is-centered has-text-centered">
|
| 321 |
+
<img
|
| 322 |
+
style="max-height:800px; max-width:800px"
|
| 323 |
+
src="static/images/semantic_grounding.png"
|
| 324 |
+
/>
|
| 325 |
+
</div>
|
| 326 |
+
|
| 327 |
+
<div class="columns is-centered has-text-centered">
|
| 328 |
+
<h2 class="title is-3">Properties if LEDITS++
|
| 329 |
+
</h2>
|
| 330 |
+
</div>
|
| 331 |
+
<div class="columns is-centered has-text-centered">
|
| 332 |
+
<div class="column">
|
| 333 |
+
<p>
|
| 334 |
+
Efficiency.
|
| 335 |
+
</p>
|
| 336 |
+
</div>
|
| 337 |
+
<div class="column">
|
| 338 |
+
<p>
|
| 339 |
+
Versatility.
|
| 340 |
+
</p>
|
| 341 |
+
</div>
|
| 342 |
+
<div class="column">
|
| 343 |
+
<p>
|
| 344 |
+
Precision.
|
| 345 |
+
</p>
|
| 346 |
+
</div>
|
| 347 |
+
</div>
|
| 348 |
+
|
| 349 |
</div>
|
| 350 |
+
<video autoplay muted loop playsinline height="100%">
|
| 351 |
+
<source src="static/videos/objects_styles.mp4"
|
| 352 |
+
type="video/mp4">
|
| 353 |
+
</video>
|
| 354 |
+
|
| 355 |
</div>
|
| 356 |
|
| 357 |
</section>
|
| 358 |
<section class="section">
|
| 359 |
<div class="container is-max-desktop">
|
| 360 |
+
<div class="columns is-centered has-text-centered">
|
| 361 |
+
<h2 class="title is-3">Interactive Demo</h2>
|
| 362 |
+
</div>
|
| 363 |
+
<script
|
| 364 |
+
type="module"
|
| 365 |
+
src="https://gradio.s3-us-west-2.amazonaws.com/3.43.0/gradio.js"
|
| 366 |
+
></script>
|
| 367 |
+
|
| 368 |
+
<gradio-app src="https://editing-images-ledtisplusplus.hf.space"></gradio-app>
|
| 369 |
|
| 370 |
</div>
|
| 371 |
</section>
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
<!-- portraits video -->
|
| 375 |
<!--<section class="hero teaser">-->
|
| 376 |
<!-- <div class="container is-max-desktop">-->
|
| 377 |
<!-- <div class="hero-body">-->
|
|
|
|
| 387 |
<!-- </div>-->
|
| 388 |
<!--</section>-->
|
| 389 |
|
| 390 |
+
<!-- 3 key observations -->
|
| 391 |
|
| 392 |
|
| 393 |
<section class="section" id="BibTeX">
|
| 394 |
+
<div class="container is-max-desktop content">
|
| 395 |
+
<h2 class="title">BibTeX</h2>
|
| 396 |
+
<pre><code>@article{
|
| 397 |
}</code></pre>
|
| 398 |
+
</div>
|
| 399 |
</section>
|
| 400 |
|
| 401 |
|