|
26 | 26 | }, |
27 | 27 | { |
28 | 28 | "cell_type": "code", |
29 | | - "execution_count": null, |
| 29 | + "execution_count": 1, |
30 | 30 | "metadata": {}, |
31 | | - "outputs": [], |
| 31 | + "outputs": [ |
| 32 | + { |
| 33 | + "name": "stdout", |
| 34 | + "output_type": "stream", |
| 35 | + "text": [ |
| 36 | + "Using device: cuda\n" |
| 37 | + ] |
| 38 | + } |
| 39 | + ], |
32 | 40 | "source": [ |
33 | 41 | "import os\n", |
34 | 42 | "import time\n", |
|
60 | 68 | }, |
61 | 69 | { |
62 | 70 | "cell_type": "code", |
63 | | - "execution_count": null, |
| 71 | + "execution_count": 2, |
64 | 72 | "metadata": {}, |
65 | | - "outputs": [], |
| 73 | + "outputs": [ |
| 74 | + { |
| 75 | + "name": "stdout", |
| 76 | + "output_type": "stream", |
| 77 | + "text": [ |
| 78 | + "Dataset size: 1000\n" |
| 79 | + ] |
| 80 | + } |
| 81 | + ], |
66 | 82 | "source": [ |
67 | 83 | "# Our model\n", |
68 | 84 | "class UltraLightModel(nn.Module):\n", |
|
136 | 152 | }, |
137 | 153 | { |
138 | 154 | "cell_type": "code", |
139 | | - "execution_count": null, |
| 155 | + "execution_count": 3, |
140 | 156 | "metadata": {}, |
141 | 157 | "outputs": [], |
142 | 158 | "source": [ |
|
176 | 192 | }, |
177 | 193 | { |
178 | 194 | "cell_type": "code", |
179 | | - "execution_count": null, |
| 195 | + "execution_count": 4, |
180 | 196 | "metadata": {}, |
181 | | - "outputs": [], |
| 197 | + "outputs": [ |
| 198 | + { |
| 199 | + "name": "stdout", |
| 200 | + "output_type": "stream", |
| 201 | + "text": [ |
| 202 | + "Baseline Training (Real Data Loading)\n" |
| 203 | + ] |
| 204 | + }, |
| 205 | + { |
| 206 | + "name": "stderr", |
| 207 | + "output_type": "stream", |
| 208 | + "text": [ |
| 209 | + "Epoch 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:03<00:00, 17.77it/s, Time=3.5s]\n" |
| 210 | + ] |
| 211 | + }, |
| 212 | + { |
| 213 | + "name": "stdout", |
| 214 | + "output_type": "stream", |
| 215 | + "text": [ |
| 216 | + "Epoch 0 - Time: 3.55s\n" |
| 217 | + ] |
| 218 | + }, |
| 219 | + { |
| 220 | + "name": "stderr", |
| 221 | + "output_type": "stream", |
| 222 | + "text": [ |
| 223 | + "Epoch 1: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:03<00:00, 17.90it/s, Time=3.5s]" |
| 224 | + ] |
| 225 | + }, |
| 226 | + { |
| 227 | + "name": "stdout", |
| 228 | + "output_type": "stream", |
| 229 | + "text": [ |
| 230 | + "Epoch 1 - Time: 3.52s\n", |
| 231 | + "Baseline average epoch time: 3.53s\n" |
| 232 | + ] |
| 233 | + }, |
| 234 | + { |
| 235 | + "name": "stderr", |
| 236 | + "output_type": "stream", |
| 237 | + "text": [ |
| 238 | + "\n" |
| 239 | + ] |
| 240 | + } |
| 241 | + ], |
182 | 242 | "source": [ |
183 | 243 | "# Your existing training setup\n", |
184 | 244 | "model = UltraLightModel(num_classes=1000).to(device)\n", |
|
209 | 269 | }, |
210 | 270 | { |
211 | 271 | "cell_type": "code", |
212 | | - "execution_count": null, |
| 272 | + "execution_count": 5, |
213 | 273 | "metadata": {}, |
214 | 274 | "outputs": [], |
215 | 275 | "source": [ |
|
228 | 288 | }, |
229 | 289 | { |
230 | 290 | "cell_type": "code", |
231 | | - "execution_count": null, |
| 291 | + "execution_count": 6, |
232 | 292 | "metadata": {}, |
233 | | - "outputs": [], |
| 293 | + "outputs": [ |
| 294 | + { |
| 295 | + "name": "stdout", |
| 296 | + "output_type": "stream", |
| 297 | + "text": [ |
| 298 | + "No-Overhead Training (Cached Data Loading)\n" |
| 299 | + ] |
| 300 | + }, |
| 301 | + { |
| 302 | + "name": "stderr", |
| 303 | + "output_type": "stream", |
| 304 | + "text": [ |
| 305 | + "Epoch 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 65.49it/s, Time=1.0s]\n" |
| 306 | + ] |
| 307 | + }, |
| 308 | + { |
| 309 | + "name": "stdout", |
| 310 | + "output_type": "stream", |
| 311 | + "text": [ |
| 312 | + "Epoch 0 - Time: 0.96s\n" |
| 313 | + ] |
| 314 | + }, |
| 315 | + { |
| 316 | + "name": "stderr", |
| 317 | + "output_type": "stream", |
| 318 | + "text": [ |
| 319 | + "Epoch 1: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 64.57it/s, Time=1.0s]" |
| 320 | + ] |
| 321 | + }, |
| 322 | + { |
| 323 | + "name": "stdout", |
| 324 | + "output_type": "stream", |
| 325 | + "text": [ |
| 326 | + "Epoch 1 - Time: 0.98s\n", |
| 327 | + "No-Overhead average epoch time: 0.97s\n" |
| 328 | + ] |
| 329 | + }, |
| 330 | + { |
| 331 | + "name": "stderr", |
| 332 | + "output_type": "stream", |
| 333 | + "text": [ |
| 334 | + "\n" |
| 335 | + ] |
| 336 | + } |
| 337 | + ], |
234 | 338 | "source": [ |
235 | 339 | "# Train with the same setup, just different dataloader\n", |
236 | 340 | "print(\"No-Overhead Training (Cached Data Loading)\")\n", |
|
256 | 360 | }, |
257 | 361 | { |
258 | 362 | "cell_type": "code", |
259 | | - "execution_count": null, |
| 363 | + "execution_count": 7, |
260 | 364 | "metadata": {}, |
261 | | - "outputs": [], |
| 365 | + "outputs": [ |
| 366 | + { |
| 367 | + "name": "stdout", |
| 368 | + "output_type": "stream", |
| 369 | + "text": [ |
| 370 | + "\n", |
| 371 | + "Performance Comparison:\n", |
| 372 | + "Baseline: 3.53s per epoch\n", |
| 373 | + "No-Overhead: 0.97s per epoch\n", |
| 374 | + "Speedup: 3.64x\n", |
| 375 | + "Time saved: 2.56s per epoch (72.6%)\n", |
| 376 | + "\n", |
| 377 | + "*** DATA LOADING BOTTLENECK DETECTED ***\n", |
| 378 | + "You could speed up training by 72.6% by optimizing data loading.\n" |
| 379 | + ] |
| 380 | + } |
| 381 | + ], |
262 | 382 | "source": [ |
263 | 383 | "# Compare performance\n", |
264 | 384 | "speedup = baseline_avg_time / sol_avg_time\n", |
|
322 | 442 | "name": "python", |
323 | 443 | "nbconvert_exporter": "python", |
324 | 444 | "pygments_lexer": "ipython3", |
325 | | - "version": "3.12.3" |
| 445 | + "version": "3.10.19" |
326 | 446 | } |
327 | 447 | }, |
328 | 448 | "nbformat": 4, |
|
0 commit comments