ruslanmv commited on
Commit
4e09337
·
1 Parent(s): 71538ab

First commit

Browse files
Files changed (4) hide show
  1. .vscode/settings.json +1 -1
  2. README.md +38 -2
  3. app.py +110 -212
  4. requirements.txt +11 -0
.vscode/settings.json CHANGED
@@ -10,7 +10,7 @@
10
  "[python]": {
11
  "editor.formatOnType": true,
12
  "editor.codeActionsOnSave": {
13
- "source.organizeImports": true
14
  }
15
  },
16
  "editor.formatOnSave": true,
 
10
  "[python]": {
11
  "editor.formatOnType": true,
12
  "editor.codeActionsOnSave": {
13
+ "source.organizeImports": "explicit"
14
  }
15
  },
16
  "editor.formatOnSave": true,
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🌍
4
  colorFrom: gray
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 4.37.1
8
  app_file: app.py
9
  license: mit
10
  pinned: false
@@ -13,4 +13,40 @@ duplicated_from: hysts/SD-XL
13
  load_balancing_strategy: random
14
  ---
15
 
16
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  colorFrom: gray
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 3.39.0
8
  app_file: app.py
9
  license: mit
10
  pinned: false
 
13
  load_balancing_strategy: random
14
  ---
15
 
16
+ # AI Fast Image Server
17
+
18
+ A lightweight Gradio app that serves fast **text-to-image** generation using either:
19
+
20
+ - **SDXL Base 1.0 + LCM** (default), or
21
+ - **SSD-1B + LCM LoRA** (enable via a flag in `app.py`)
22
+
23
+ The app targets **very few inference steps** (e.g., 4) for speed while keeping good image quality. It falls back to **CPU** automatically if CUDA isn’t available.
24
+
25
+ ---
26
+
27
+ ## Features
28
+
29
+ - ⚡ **Fast sampling** with **LCM** schedulers
30
+ - 🔁 **Deterministic** results via seed
31
+ - 🖥️ **Auto GPU/CPU** selection (no brittle `nvidia-smi` checks)
32
+ - 🔐 Optional **secret token** gate to prevent abuse
33
+ - 🧩 Switch between **SDXL** and **SSD-1B+LCM LoRA** with a flag
34
+
35
+ ---
36
+
37
+ ## Requirements
38
+
39
+ Dependencies are pinned for compatibility (notably `diffusers==0.23.0` + `huggingface_hub==0.14.1`):
40
+
41
+ ```txt
42
+ accelerate==0.24.1
43
+ diffusers==0.23.0
44
+ gradio==3.39.0
45
+ huggingface_hub==0.14.1
46
+ invisible-watermark==0.2.0
47
+ Pillow==10.1.0
48
+ torch==2.1.0
49
+ transformers==4.35.0
50
+ safetensors==0.4.0
51
+ numpy>=1.23
52
+ ipython
app.py CHANGED
@@ -1,122 +1,106 @@
 
1
  run_api = False
2
  SSD_1B = False
3
- import os
4
-
5
- # Use GPU
6
- gpu_info = os.popen("nvidia-smi").read()
7
- if "failed" in gpu_info:
8
- print("Not connected to a GPU")
9
- is_gpu = False
10
- else:
11
- print(gpu_info)
12
- is_gpu = True
13
- print(is_gpu)
14
-
15
 
 
 
 
 
16
  from IPython.display import clear_output
17
 
18
-
19
- def check_enviroment():
 
20
  try:
21
- import torch
22
-
23
- print("Enviroment is already installed.")
24
  except ImportError:
25
- print("Enviroment not found. Installing...")
26
- # Install requirements from requirements.txt
27
- os.system("pip install -r requirements.txt")
28
- # Install gradio version 3.48.0
29
- os.system("pip install gradio==3.39.0")
30
- # Install python-dotenv
31
- os.system("pip install python-dotenv")
32
- # Clear the output
33
  clear_output()
 
34
 
35
- print("Enviroment installed successfully.")
36
-
37
-
38
- # Call the function to check and install Packages if necessary
39
- check_enviroment()
40
-
41
 
42
- from IPython.display import clear_output
43
- import os
44
- import gradio as gr
45
- import numpy as np
46
- import PIL
47
- import base64
48
- import io
49
  import torch
 
 
50
  from diffusers import UNet2DConditionModel, DiffusionPipeline, LCMScheduler
51
 
52
- # SDXL
53
- from diffusers import UNet2DConditionModel, DiffusionPipeline, LCMScheduler
54
 
55
- # Get the current directory
56
  current_dir = os.getcwd()
57
- model_path = os.path.join(current_dir)
58
- # Set the cache path
59
  cache_path = os.path.join(current_dir, "cache")
 
 
60
  MAX_SEED = np.iinfo(np.int32).max
61
  MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
62
  SECRET_TOKEN = os.getenv("SECRET_TOKEN", "default_secret")
63
 
64
- # Uncomment the following line if you are using PyTorch 1.10 or later
65
- # os.environ["TORCH_USE_CUDA_DSA"] = "1"
66
-
67
- if is_gpu:
68
- # Uncomment the following line if you want to enable CUDA launch blocking
69
- os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
70
- else:
71
- # Uncomment the following line if you want to use CPU instead of GPU
72
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
73
-
74
-
75
- # Get the current directory
76
- current_dir = os.getcwd()
77
- model_path = os.path.join(current_dir)
78
-
79
- # Set the cache path
80
- cache_path = os.path.join(current_dir, "cache")
81
-
 
 
 
 
 
 
 
 
 
82
  if not SSD_1B:
83
-
84
  unet = UNet2DConditionModel.from_pretrained(
85
  "latent-consistency/lcm-sdxl",
86
- torch_dtype=torch.float16,
87
- variant="fp16",
88
  cache_dir=cache_path,
89
  )
90
  pipe = DiffusionPipeline.from_pretrained(
91
  "stabilityai/stable-diffusion-xl-base-1.0",
92
  unet=unet,
93
- torch_dtype=torch.float16,
94
- variant="fp16",
95
  cache_dir=cache_path,
96
  )
97
-
98
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
99
- if torch.cuda.is_available():
100
- pipe.to("cuda")
101
  else:
102
- # SSD-1B
103
- from diffusers import LCMScheduler, AutoPipelineForText2Image
104
-
105
  pipe = AutoPipelineForText2Image.from_pretrained(
106
  "segmind/SSD-1B",
107
- torch_dtype=torch.float16,
108
- variant="fp16",
109
  cache_dir=cache_path,
110
  )
111
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
112
- if torch.cuda.is_available():
113
- pipe.to("cuda")
114
-
115
- # load and fuse
116
  pipe.load_lora_weights("latent-consistency/lcm-lora-ssd-1b")
117
  pipe.fuse_lora()
118
 
119
-
120
  def generate(
121
  prompt: str,
122
  negative_prompt: str = "",
@@ -126,15 +110,18 @@ def generate(
126
  guidance_scale: float = 0.0,
127
  num_inference_steps: int = 4,
128
  secret_token: str = "",
129
- ) -> PIL.Image.Image:
130
  if secret_token != SECRET_TOKEN:
131
- raise gr.Error(
132
- f"Invalid secret token. Please fork the original space if you want to use it for yourself."
133
- )
 
134
 
135
- generator = torch.Generator().manual_seed(seed)
 
 
136
 
137
- image = pipe(
138
  prompt=prompt,
139
  negative_prompt=negative_prompt,
140
  width=width,
@@ -143,18 +130,14 @@ def generate(
143
  num_inference_steps=num_inference_steps,
144
  generator=generator,
145
  output_type="pil",
146
- ).images[0]
147
- return image
148
-
149
 
150
  clear_output()
151
 
152
- from IPython.display import display
153
-
154
-
155
- def generate_image(prompt="A beautiful and sexy girl"):
156
- # Generate the image using the prompt
157
- generated_image = generate(
158
  prompt=prompt,
159
  negative_prompt="",
160
  seed=0,
@@ -162,145 +145,60 @@ def generate_image(prompt="A beautiful and sexy girl"):
162
  height=1024,
163
  guidance_scale=0.0,
164
  num_inference_steps=4,
165
- secret_token="default_secret", # Replace with your secret token
166
  )
167
- # Display the image in the Jupyter Notebook
168
- display(generated_image)
169
-
170
 
 
171
  if not run_api:
172
- secret_token = gr.Text(
173
  label="Secret Token",
174
- max_lines=1,
175
  placeholder="Enter your secret token",
 
176
  )
177
- prompt = gr.Text(
178
  label="Prompt",
179
- show_label=False,
180
- max_lines=1,
181
  placeholder="Enter your prompt",
182
- container=False,
183
  )
184
- result = gr.Image(label="Result", show_label=False)
185
- negative_prompt = gr.Text(
186
  label="Negative prompt",
187
- max_lines=1,
188
- placeholder="Enter a negative prompt",
189
- visible=True,
190
  )
191
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
 
 
 
 
192
 
193
- width = gr.Slider(
194
- label="Width",
195
- minimum=256,
196
- maximum=MAX_IMAGE_SIZE,
197
- step=32,
198
- value=1024,
199
- )
200
- height = gr.Slider(
201
- label="Height",
202
- minimum=256,
203
- maximum=MAX_IMAGE_SIZE,
204
- step=32,
205
- value=1024,
206
- )
207
- guidance_scale = gr.Slider(
208
- label="Guidance scale", minimum=0, maximum=2, step=0.1, value=0.0
209
- )
210
- num_inference_steps = gr.Slider(
211
- label="Number of inference steps", minimum=1, maximum=8, step=1, value=4
212
- )
213
- inputs = [
214
- prompt,
215
- negative_prompt,
216
- seed,
217
- width,
218
- height,
219
- guidance_scale,
220
- num_inference_steps,
221
- secret_token,
222
- ]
223
  iface = gr.Interface(
224
  fn=generate,
225
- inputs=inputs,
226
- outputs=result,
227
- title="Image Generator",
228
- description="Generate images based on prompts.",
229
  )
230
-
231
  iface.launch()
232
 
233
-
234
  if run_api:
235
  with gr.Blocks() as demo:
236
- gr.HTML(
237
- """
238
- <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
239
- <div style="text-align: center; color: black;">
240
- <p style="color: black;">This space is a REST API to programmatically generate images using LCM LoRA SSD-1B.</p>
241
- <p style="color: black;">It is not meant to be directly used through a user interface, but using code and an access key.</p>
242
- </div>
243
- </div>"""
244
- )
245
- secret_token = gr.Text(
246
- label="Secret Token",
247
- max_lines=1,
248
- placeholder="Enter your secret token",
249
- )
250
- prompt = gr.Text(
251
- label="Prompt",
252
- show_label=False,
253
- max_lines=1,
254
- placeholder="Enter your prompt",
255
- container=False,
256
- )
257
- result = gr.Image(label="Result", show_label=False)
258
- negative_prompt = gr.Text(
259
- label="Negative prompt",
260
- max_lines=1,
261
- placeholder="Enter a negative prompt",
262
- visible=True,
263
  )
 
 
 
264
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
 
 
 
 
265
 
266
- width = gr.Slider(
267
- label="Width",
268
- minimum=256,
269
- maximum=MAX_IMAGE_SIZE,
270
- step=32,
271
- value=1024,
272
- )
273
- height = gr.Slider(
274
- label="Height",
275
- minimum=256,
276
- maximum=MAX_IMAGE_SIZE,
277
- step=32,
278
- value=1024,
279
- )
280
- guidance_scale = gr.Slider(
281
- label="Guidance scale", minimum=0, maximum=2, step=0.1, value=0.0
282
- )
283
- num_inference_steps = gr.Slider(
284
- label="Number of inference steps", minimum=1, maximum=8, step=1, value=4
285
- )
286
-
287
- inputs = [
288
- prompt,
289
- negative_prompt,
290
- seed,
291
- width,
292
- height,
293
- guidance_scale,
294
- num_inference_steps,
295
- secret_token,
296
- ]
297
- prompt.submit(
298
- fn=generate,
299
- inputs=inputs,
300
- outputs=result,
301
- api_name="run",
302
- )
303
 
304
- # demo.queue(max_size=32).launch()
305
- # Launch the Gradio app with multiple workers and debug mode enabled
306
- demo.queue(max_size=32).launch(debug=True)
 
1
+ # ---- Flags ----
2
  run_api = False
3
  SSD_1B = False
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # ---- Standard imports ----
6
+ import os
7
+ import subprocess
8
+ import numpy as np
9
  from IPython.display import clear_output
10
 
11
+ # ---- Minimal, deterministic env bootstrap (optional) ----
12
+ # Prefer pinning in requirements.txt instead of installing here.
13
+ def check_environment():
14
  try:
15
+ import torch # noqa: F401
16
+ print("Environment is already installed.")
 
17
  except ImportError:
18
+ print("Environment not found. Installing pinned dependencies...")
19
+ # Strongly prefer doing this via requirements.txt at build time.
20
+ os.system("pip install --upgrade pip")
21
+ os.system("pip install diffusers==0.30.0 transformers>=4.41.0 accelerate>=0.31.0 huggingface_hub>=0.23.4 safetensors>=0.4.2 gradio==4.37.1 python-dotenv")
 
 
 
 
22
  clear_output()
23
+ print("Environment installed successfully.")
24
 
25
+ check_environment()
 
 
 
 
 
26
 
27
+ # ---- App imports (safe after environment check) ----
 
 
 
 
 
 
28
  import torch
29
+ import gradio as gr
30
+ from PIL import Image
31
  from diffusers import UNet2DConditionModel, DiffusionPipeline, LCMScheduler
32
 
33
+ # Optional: only imported if SSD_1B=True
34
+ # from diffusers import AutoPipelineForText2Image
35
 
36
+ # ---- Config / constants ----
37
  current_dir = os.getcwd()
 
 
38
  cache_path = os.path.join(current_dir, "cache")
39
+ os.makedirs(cache_path, exist_ok=True)
40
+
41
  MAX_SEED = np.iinfo(np.int32).max
42
  MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
43
  SECRET_TOKEN = os.getenv("SECRET_TOKEN", "default_secret")
44
 
45
+ # ---- GPU / NVML detection (robust) ----
46
+ def print_nvidia_smi():
47
+ try:
48
+ proc = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
49
+ if proc.returncode == 0:
50
+ print(proc.stdout)
51
+ else:
52
+ # Show the stderr to aid debugging, but don't trust it for logic
53
+ print(proc.stderr or "nvidia-smi returned a non-zero exit code.")
54
+ except FileNotFoundError:
55
+ print("nvidia-smi not found on PATH.")
56
+
57
+ print_nvidia_smi()
58
+
59
+ is_gpu = torch.cuda.is_available()
60
+ print(f"CUDA available: {is_gpu}")
61
+
62
+ # dtype & device
63
+ dtype = torch.float16 if is_gpu else torch.float32
64
+ device = torch.device("cuda") if is_gpu else torch.device("cpu")
65
+
66
+ # Optional: fewer surprises when CUDA is flaky
67
+ if not is_gpu:
68
+ # Avoid cuda-related env flags when no GPU
69
+ os.environ.pop("CUDA_LAUNCH_BLOCKING", None)
70
+
71
+ # ---- Pipeline setup ----
72
  if not SSD_1B:
73
+ # SDXL base + LCM UNet
74
  unet = UNet2DConditionModel.from_pretrained(
75
  "latent-consistency/lcm-sdxl",
76
+ torch_dtype=dtype,
77
+ variant="fp16" if is_gpu else None,
78
  cache_dir=cache_path,
79
  )
80
  pipe = DiffusionPipeline.from_pretrained(
81
  "stabilityai/stable-diffusion-xl-base-1.0",
82
  unet=unet,
83
+ torch_dtype=dtype,
84
+ variant="fp16" if is_gpu else None,
85
  cache_dir=cache_path,
86
  )
 
87
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
88
+ pipe.to(device)
 
89
  else:
90
+ # SSD-1B + LCM LoRA
91
+ from diffusers import AutoPipelineForText2Image # local import
 
92
  pipe = AutoPipelineForText2Image.from_pretrained(
93
  "segmind/SSD-1B",
94
+ torch_dtype=dtype,
95
+ variant="fp16" if is_gpu else None,
96
  cache_dir=cache_path,
97
  )
98
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
99
+ pipe.to(device)
 
 
 
100
  pipe.load_lora_weights("latent-consistency/lcm-lora-ssd-1b")
101
  pipe.fuse_lora()
102
 
103
+ # ---- Core generate function ----
104
  def generate(
105
  prompt: str,
106
  negative_prompt: str = "",
 
110
  guidance_scale: float = 0.0,
111
  num_inference_steps: int = 4,
112
  secret_token: str = "",
113
+ ) -> Image.Image:
114
  if secret_token != SECRET_TOKEN:
115
+ raise gr.Error("Invalid secret token. Set SECRET_TOKEN on the server or pass the correct token.")
116
+ # Make sure sizes are sane on CPU
117
+ width = int(np.clip(width, 256, MAX_IMAGE_SIZE))
118
+ height = int(np.clip(height, 256, MAX_IMAGE_SIZE))
119
 
120
+ generator = torch.Generator(device=device)
121
+ if seed is not None:
122
+ generator = generator.manual_seed(int(seed))
123
 
124
+ out = pipe(
125
  prompt=prompt,
126
  negative_prompt=negative_prompt,
127
  width=width,
 
130
  num_inference_steps=num_inference_steps,
131
  generator=generator,
132
  output_type="pil",
133
+ )
134
+ return out.images[0]
 
135
 
136
  clear_output()
137
 
138
+ # ---- Optional notebook helper ----
139
+ def generate_image(prompt="A scenic watercolor landscape, mountains at dawn"):
140
+ img = generate(
 
 
 
141
  prompt=prompt,
142
  negative_prompt="",
143
  seed=0,
 
145
  height=1024,
146
  guidance_scale=0.0,
147
  num_inference_steps=4,
148
+ secret_token=SECRET_TOKEN,
149
  )
150
+ from IPython.display import display
151
+ display(img)
 
152
 
153
+ # ---- UI ----
154
  if not run_api:
155
+ secret_token = gr.Textbox(
156
  label="Secret Token",
 
157
  placeholder="Enter your secret token",
158
+ type="password",
159
  )
160
+ prompt = gr.Textbox(
161
  label="Prompt",
162
+ show_label=True,
163
+ max_lines=2,
164
  placeholder="Enter your prompt",
 
165
  )
166
+ negative_prompt = gr.Textbox(
 
167
  label="Negative prompt",
168
+ max_lines=2,
169
+ placeholder="Enter a negative prompt (optional)",
 
170
  )
171
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
172
+ width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
173
+ height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
174
+ guidance_scale = gr.Slider(label="Guidance scale", minimum=0, maximum=2, step=0.1, value=0.0)
175
+ num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=8, step=1, value=4)
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  iface = gr.Interface(
178
  fn=generate,
179
+ inputs=[prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps, secret_token],
180
+ outputs=gr.Image(label="Result"),
181
+ title="Image Generator (LCM)",
182
+ description="Fast SDXL/SSD-1B image generation with LCM. Uses CPU if CUDA is unavailable.",
183
  )
 
184
  iface.launch()
185
 
 
186
  if run_api:
187
  with gr.Blocks() as demo:
188
+ gr.Markdown(
189
+ "### REST API for LCM Text-to-Image\n"
190
+ "Use the `/run` endpoint programmatically with your secret."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  )
192
+ secret_token = gr.Textbox(label="Secret Token", type="password")
193
+ prompt = gr.Textbox(label="Prompt")
194
+ negative_prompt = gr.Textbox(label="Negative prompt")
195
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
196
+ width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
197
+ height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
198
+ guidance_scale = gr.Slider(label="Guidance scale", minimum=0, maximum=2, step=0.1, value=0.0)
199
+ num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=8, step=1, value=4)
200
 
201
+ inputs = [prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps, secret_token]
202
+ prompt.submit(fn=generate, inputs=inputs, outputs=gr.Image(), api_name="run")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
+ demo.queue(max_size=32).launch(debug=False)
 
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  accelerate==0.24.1
2
  diffusers==0.23.0
3
  gradio==3.39.0
@@ -6,3 +7,13 @@ Pillow==10.1.0
6
  torch==2.1.0
7
  transformers==4.35.0
8
  ipython
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core stack (kept at your versions)
2
  accelerate==0.24.1
3
  diffusers==0.23.0
4
  gradio==3.39.0
 
7
  torch==2.1.0
8
  transformers==4.35.0
9
  ipython
10
+
11
+ # Must-add pins for compatibility
12
+ # diffusers==0.23.0 expects `cached_download` to exist in huggingface_hub
13
+ huggingface_hub==0.14.1
14
+
15
+ # Recommended: used by diffusers/transformers when loading weights
16
+ safetensors==0.4.0
17
+
18
+ # Your code imports numpy directly
19
+ numpy>=1.23