1 2 /// D translation of stb_image-1.33 (http://nothings.org/stb_image.c) 3 /// 4 /// This port only supports: 5 /// $(UL 6 /// $(LI PNG 8-bit-per-channel only.) 7 /// $(LI JPEG baseline (no JPEG progressive).) 8 /// $(LI GIF.) 9 /// $(LI BMP non-1bpp, non-RLE.) 10 /// ) 11 /// 12 /// TODO: 13 /// $(UL 14 /// $(LI Support a range as input.) 15 /// ) 16 17 //============================ Contributors ========================= 18 // 19 // Image formats Optimizations & bugfixes 20 // Sean Barrett (jpeg, png, bmp) Fabian "ryg" Giesen 21 // Nicolas Schulz (hdr, psd) 22 // Jonathan Dummer (tga) Bug fixes & warning fixes 23 // Jean-Marc Lienher (gif) Marc LeBlanc 24 // Tom Seddon (pic) Christpher Lloyd 25 // Thatcher Ulrich (psd) Dave Moore 26 // Won Chun 27 // the Horde3D community 28 // Extensions, features Janez Zemva 29 // Jetro Lauha (stbi_info) Jonathan Blow 30 // James "moose2000" Brown (iPhone PNG) Laurent Gomila 31 // Ben "Disch" Wenger (io callbacks) Aruelien Pocheville 32 // Martin "SpartanJ" Golini Ryamond Barbiero 33 // David Woo 34 35 module gfm.image.stb_image; 36 37 import core.stdc.stdlib; 38 import core.stdc..string; 39 40 import gfm.math.vector, 41 gfm.image.bitmap; 42 43 import ae.utils.graphics.image; 44 45 enum STBI_VERSION = 1; 46 47 /// The exception type thrown when loading an image failed. 48 class STBImageException : Exception 49 { 50 public 51 { 52 @safe pure nothrow this(string message, string file =__FILE__, size_t line = __LINE__, Throwable next = null) 53 { 54 super(message, file, line, next); 55 } 56 } 57 } 58 59 enum : int 60 { 61 STBI_default = 0, // only used for req_comp 62 STBI_grey = 1, 63 STBI_grey_alpha = 2, 64 STBI_rgb = 3, 65 STBI_rgb_alpha = 4 66 }; 67 68 // define faster low-level operations (typically SIMD support) 69 70 71 uint stbi_lrot(uint x, uint y) 72 { 73 return (x << y) | (x >> (32 - y)); 74 } 75 76 // stbi structure is our basic context used by all images, so it 77 // contains all the IO context, plus some basic image information 78 struct stbi 79 { 80 uint img_x, img_y; 81 int img_n, img_out_n; 82 83 int buflen; 84 ubyte buffer_start[128]; 85 86 const(ubyte) *img_buffer; 87 const(ubyte) *img_buffer_end; 88 const(ubyte) *img_buffer_original; 89 } 90 91 92 // initialize a memory-decode context 93 void start_mem(stbi *s, const(ubyte)*buffer, int len) 94 { 95 s.img_buffer = buffer; 96 s.img_buffer_original = buffer; 97 s.img_buffer_end = buffer+len; 98 } 99 100 void stbi_rewind(stbi *s) 101 { 102 // conceptually rewind SHOULD rewind to the beginning of the stream, 103 // but we just rewind to the beginning of the initial buffer, because 104 // we only use it after doing 'test', which only ever looks at at most 92 bytes 105 s.img_buffer = s.img_buffer_original; 106 } 107 108 109 ubyte *stbi_load_main(stbi *s, int *x, int *y, int *comp, int req_comp) 110 { 111 try 112 { 113 stbi_jpeg_test(s); 114 stbi_rewind(s); 115 return stbi_jpeg_load(s,x,y,comp,req_comp); 116 } 117 catch(STBImageException e) 118 { 119 stbi_rewind(s); 120 } 121 122 try 123 { 124 stbi_png_test(s); 125 stbi_rewind(s); 126 return stbi_png_load(s,x,y,comp,req_comp); 127 } 128 catch(STBImageException e) 129 { 130 stbi_rewind(s); 131 } 132 133 try 134 { 135 stbi_bmp_test(s); 136 stbi_rewind(s); 137 return stbi_bmp_load(s,x,y,comp,req_comp); 138 } 139 catch(STBImageException e) 140 { 141 stbi_rewind(s); 142 } 143 144 try 145 { 146 stbi_gif_test(s); 147 stbi_rewind(s); 148 return stbi_gif_load(s,x,y,comp,req_comp); 149 } 150 catch(STBImageException e) 151 { 152 stbi_rewind(s); 153 } 154 155 throw new STBImageException("Image not of any known type, or corrupt"); 156 } 157 158 /// Loads an image from memory. 159 /// Throws: STBImageException on error. 160 ubyte* stbi_load_from_memory(void[] buffer, out int width, out int height, out int components, int requestedComponents) 161 { 162 stbi s; 163 start_mem(&s, cast(ubyte*)buffer.ptr, cast(int)(buffer.length)); 164 return stbi_load_main(&s, &width, &height, &components, requestedComponents); 165 } 166 167 /// Frees an image loaded by stb_image. 168 void stbi_image_free(void *retval_from_stbi_load) 169 { 170 free(retval_from_stbi_load); 171 } 172 173 /// Load an image from memory and puts it in a Bitmap. 174 /// See_also: Bitmap. 175 /// Throws: STBImageException on error. 176 deprecated("Use ae.utils.graphics instead") 177 Bitmap!vec4ub stbiLoadImage(void[] buffer) 178 { 179 int width, height, components; 180 ubyte* data = stbi_load_from_memory(buffer, width, height, components, 4); 181 scope(exit) stbi_image_free(data); 182 183 if(components != 4) 184 throw new STBImageException("Could't convert image to 4 components"); 185 186 auto result = Bitmap!vec4ub(vec2i(width, height)); 187 memcpy(result.ptr, data, width * height); 188 return result; 189 } 190 191 /// Load an image from memory and puts it in a ae.utils.graphics.image.Image. 192 /// Throws: STBImageException on error. 193 Image!vec4ub stbiLoadImageAE(void[] buffer) 194 { 195 int width, height, components; 196 ubyte* data = stbi_load_from_memory(buffer, width, height, components, 4); 197 scope(exit) stbi_image_free(data); 198 199 if(components != 4) 200 throw new STBImageException("Could't convert image to 4 components"); 201 202 auto result = Image!vec4ub(width, height); 203 size_t length = width * height * vec4ub.sizeof; 204 result.pixels[] = cast(vec4ub[])(data[0..length]); 205 return result; 206 } 207 208 // 209 // Common code used by all image loaders 210 // 211 212 enum : int 213 { 214 SCAN_load=0, 215 SCAN_type, 216 SCAN_header 217 }; 218 219 220 int get8(stbi *s) 221 { 222 if (s.img_buffer < s.img_buffer_end) 223 return *s.img_buffer++; 224 225 return 0; 226 } 227 228 int at_eof(stbi *s) 229 { 230 return s.img_buffer >= s.img_buffer_end; 231 } 232 233 ubyte get8u(stbi *s) 234 { 235 return cast(ubyte) get8(s); 236 } 237 238 void skip(stbi *s, int n) 239 { 240 s.img_buffer += n; 241 } 242 243 int getn(stbi *s, ubyte *buffer, int n) 244 { 245 if (s.img_buffer+n <= s.img_buffer_end) { 246 memcpy(buffer, s.img_buffer, n); 247 s.img_buffer += n; 248 return 1; 249 } else 250 return 0; 251 } 252 253 int get16(stbi *s) 254 { 255 int z = get8(s); 256 return (z << 8) + get8(s); 257 } 258 259 uint get32(stbi *s) 260 { 261 uint z = get16(s); 262 return (z << 16) + get16(s); 263 } 264 265 int get16le(stbi *s) 266 { 267 int z = get8(s); 268 return z + (get8(s) << 8); 269 } 270 271 uint get32le(stbi *s) 272 { 273 uint z = get16le(s); 274 return z + (get16le(s) << 16); 275 } 276 277 // 278 // generic converter from built-in img_n to req_comp 279 // individual types do this automatically as much as possible (e.g. jpeg 280 // does all cases internally since it needs to colorspace convert anyway, 281 // and it never has alpha, so very few cases ). png can automatically 282 // interleave an alpha=255 channel, but falls back to this for other cases 283 // 284 // assume data buffer is malloced, so malloc a new one and free that one 285 // only failure mode is malloc failing 286 287 ubyte compute_y(int r, int g, int b) 288 { 289 return cast(ubyte) (((r*77) + (g*150) + (29*b)) >> 8); 290 } 291 292 ubyte *convert_format(ubyte *data, int img_n, int req_comp, uint x, uint y) 293 { 294 int i,j; 295 ubyte *good; 296 297 if (req_comp == img_n) return data; 298 assert(req_comp >= 1 && req_comp <= 4); 299 300 good = cast(ubyte*) malloc(req_comp * x * y); 301 if (good == null) { 302 free(data); 303 throw new STBImageException("Out of memory"); 304 } 305 306 for (j=0; j < cast(int) y; ++j) { 307 ubyte *src = data + j * x * img_n ; 308 ubyte *dest = good + j * x * req_comp; 309 310 // convert source image with img_n components to one with req_comp components; 311 // avoid switch per pixel, so use switch per scanline and massive macros 312 switch (img_n * 8 + req_comp) 313 { 314 case 1 * 8 + 2: 315 for(i=x-1; i >= 0; --i, src += 1, dest += 2) 316 dest[0] = src[0], dest[1] = 255; 317 break; 318 case 1 * 8 + 3: 319 for(i=x-1; i >= 0; --i, src += 1, dest += 3) 320 dest[0]=dest[1]=dest[2]=src[0]; 321 break; 322 case 1 * 8 + 4: 323 for(i=x-1; i >= 0; --i, src += 1, dest += 4) 324 dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; 325 break; 326 case 2 * 8 + 1: 327 for(i=x-1; i >= 0; --i, src += 2, dest += 1) 328 dest[0]=src[0]; 329 break; 330 case 2 * 8 + 3: 331 for(i=x-1; i >= 0; --i, src += 2, dest += 3) 332 dest[0]=dest[1]=dest[2]=src[0]; 333 break; 334 case 2 * 8 + 4: 335 for(i=x-1; i >= 0; --i, src += 2, dest += 4) 336 dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; 337 break; 338 case 3 * 8 + 4: 339 for(i=x-1; i >= 0; --i, src += 3, dest += 4) 340 dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; 341 break; 342 case 3 * 8 + 1: 343 for(i=x-1; i >= 0; --i, src += 3, dest += 1) 344 dest[0]=compute_y(src[0],src[1],src[2]); 345 break; 346 case 3 * 8 + 2: 347 for(i=x-1; i >= 0; --i, src += 3, dest += 2) 348 dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; 349 break; 350 case 4 * 8 + 1: 351 for(i=x-1; i >= 0; --i, src += 4, dest += 1) 352 dest[0]=compute_y(src[0],src[1],src[2]); 353 break; 354 case 4 * 8 + 2: 355 for(i=x-1; i >= 0; --i, src += 4, dest += 2) 356 dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; 357 break; 358 case 4 * 8 + 3: 359 for(i=x-1; i >= 0; --i, src += 4, dest += 3) 360 dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; 361 break; 362 default: assert(0); 363 } 364 } 365 366 free(data); 367 return good; 368 } 369 370 // 371 // "baseline" JPEG/JFIF decoder (not actually fully baseline implementation) 372 // 373 // simple implementation 374 // - channel subsampling of at most 2 in each dimension 375 // - doesn't support delayed output of y-dimension 376 // - simple interface (only one output format: 8-bit interleaved RGB) 377 // - doesn't try to recover corrupt jpegs 378 // - doesn't allow partial loading, loading multiple at once 379 // - still fast on x86 (copying globals into locals doesn't help x86) 380 // - allocates lots of intermediate memory (full size of all components) 381 // - non-interleaved case requires this anyway 382 // - allows good upsampling (see next) 383 // high-quality 384 // - upsampled channels are bilinearly interpolated, even across blocks 385 // - quality integer IDCT derived from IJG's 'slow' 386 // performance 387 // - fast huffman; reasonable integer IDCT 388 // - uses a lot of intermediate memory, could cache poorly 389 // - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4 390 // stb_jpeg: 1.34 seconds (MSVC6, default release build) 391 // stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro) 392 // IJL11.dll: 1.08 seconds (compiled by intel) 393 // IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG) 394 // IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro) 395 396 // huffman decoding acceleration 397 enum FAST_BITS = 9; // larger handles more cases; smaller stomps less cache 398 399 struct huffman 400 { 401 ubyte[1 << FAST_BITS] fast; 402 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win 403 ushort[256] code; 404 ubyte[256] values; 405 ubyte[257] size; 406 uint[18] maxcode; 407 int[17] delta; // old 'firstsymbol' - old 'firstcode' 408 } 409 410 struct jpeg 411 { 412 stbi *s; 413 huffman[4] huff_dc; 414 huffman[4] huff_ac; 415 ubyte[64][4] dequant; 416 417 // sizes for components, interleaved MCUs 418 int img_h_max, img_v_max; 419 int img_mcu_x, img_mcu_y; 420 int img_mcu_w, img_mcu_h; 421 422 // definition of jpeg image component 423 struct img_comp_ 424 { 425 int id; 426 int h,v; 427 int tq; 428 int hd,ha; 429 int dc_pred; 430 431 int x,y,w2,h2; 432 ubyte *data; 433 void *raw_data; 434 ubyte *linebuf; 435 } 436 437 img_comp_[4] img_comp; 438 439 uint code_buffer; // jpeg entropy-coded buffer 440 int code_bits; // number of valid bits 441 ubyte marker; // marker seen while filling entropy buffer 442 int nomore; // flag if we saw a marker so must stop 443 444 int scan_n; 445 int[4] order; 446 int restart_interval, todo; 447 } 448 449 450 int build_huffman(huffman *h, int *count) 451 { 452 int i,j,k=0,code; 453 // build size list for each symbol (from JPEG spec) 454 for (i=0; i < 16; ++i) 455 for (j=0; j < count[i]; ++j) 456 h.size[k++] = cast(ubyte) (i+1); 457 h.size[k] = 0; 458 459 // compute actual symbols (from jpeg spec) 460 code = 0; 461 k = 0; 462 for(j=1; j <= 16; ++j) { 463 // compute delta to add to code to compute symbol id 464 h.delta[j] = k - code; 465 if (h.size[k] == j) { 466 while (h.size[k] == j) 467 h.code[k++] = cast(ushort) (code++); 468 if (code-1 >= (1 << j)) 469 throw new STBImageException("Bad code lengths, corrupt JPEG"); 470 } 471 // compute largest code + 1 for this size, preshifted as needed later 472 h.maxcode[j] = code << (16-j); 473 code <<= 1; 474 } 475 h.maxcode[j] = 0xffffffff; 476 477 // build non-spec acceleration table; 255 is flag for not-accelerated 478 memset(h.fast.ptr, 255, 1 << FAST_BITS); 479 for (i=0; i < k; ++i) { 480 int s = h.size[i]; 481 if (s <= FAST_BITS) { 482 int c = h.code[i] << (FAST_BITS-s); 483 int m = 1 << (FAST_BITS-s); 484 for (j=0; j < m; ++j) { 485 h.fast[c+j] = cast(ubyte) i; 486 } 487 } 488 } 489 return 1; 490 } 491 492 void grow_buffer_unsafe(jpeg *j) 493 { 494 do { 495 int b = j.nomore ? 0 : get8(j.s); 496 if (b == 0xff) { 497 int c = get8(j.s); 498 if (c != 0) { 499 j.marker = cast(ubyte) c; 500 j.nomore = 1; 501 return; 502 } 503 } 504 j.code_buffer |= b << (24 - j.code_bits); 505 j.code_bits += 8; 506 } while (j.code_bits <= 24); 507 } 508 509 // (1 << n) - 1 510 static immutable uint bmask[17]=[0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535]; 511 512 // decode a jpeg huffman value from the bitstream 513 int decode(jpeg *j, huffman *h) 514 { 515 uint temp; 516 int c,k; 517 518 if (j.code_bits < 16) grow_buffer_unsafe(j); 519 520 // look at the top FAST_BITS and determine what symbol ID it is, 521 // if the code is <= FAST_BITS 522 c = (j.code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); 523 k = h.fast[c]; 524 if (k < 255) { 525 int s = h.size[k]; 526 if (s > j.code_bits) 527 return -1; 528 j.code_buffer <<= s; 529 j.code_bits -= s; 530 return h.values[k]; 531 } 532 533 // naive test is to shift the code_buffer down so k bits are 534 // valid, then test against maxcode. To speed this up, we've 535 // preshifted maxcode left so that it has (16-k) 0s at the 536 // end; in other words, regardless of the number of bits, it 537 // wants to be compared against something shifted to have 16; 538 // that way we don't need to shift inside the loop. 539 temp = j.code_buffer >> 16; 540 for (k=FAST_BITS+1 ; ; ++k) 541 if (temp < h.maxcode[k]) 542 break; 543 if (k == 17) { 544 // error! code not found 545 j.code_bits -= 16; 546 return -1; 547 } 548 549 if (k > j.code_bits) 550 return -1; 551 552 // convert the huffman code to the symbol id 553 c = ((j.code_buffer >> (32 - k)) & bmask[k]) + h.delta[k]; 554 assert((((j.code_buffer) >> (32 - h.size[c])) & bmask[h.size[c]]) == h.code[c]); 555 556 // convert the id to a symbol 557 j.code_bits -= k; 558 j.code_buffer <<= k; 559 return h.values[c]; 560 } 561 562 // combined JPEG 'receive' and JPEG 'extend', since baseline 563 // always extends everything it receives. 564 int extend_receive(jpeg *j, int n) 565 { 566 uint m = 1 << (n-1); 567 uint k; 568 if (j.code_bits < n) grow_buffer_unsafe(j); 569 570 k = stbi_lrot(j.code_buffer, n); 571 j.code_buffer = k & ~bmask[n]; 572 k &= bmask[n]; 573 j.code_bits -= n; 574 575 // the following test is probably a random branch that won't 576 // predict well. I tried to table accelerate it but failed. 577 // maybe it's compiling as a conditional move? 578 if (k < m) 579 return (-1 << n) + k + 1; 580 else 581 return k; 582 } 583 584 // given a value that's at position X in the zigzag stream, 585 // where does it appear in the 8x8 matrix coded as row-major? 586 static immutable ubyte dezigzag[64+15] = 587 [ 588 0, 1, 8, 16, 9, 2, 3, 10, 589 17, 24, 32, 25, 18, 11, 4, 5, 590 12, 19, 26, 33, 40, 48, 41, 34, 591 27, 20, 13, 6, 7, 14, 21, 28, 592 35, 42, 49, 56, 57, 50, 43, 36, 593 29, 22, 15, 23, 30, 37, 44, 51, 594 58, 59, 52, 45, 38, 31, 39, 46, 595 53, 60, 61, 54, 47, 55, 62, 63, 596 // let corrupt input sample past end 597 63, 63, 63, 63, 63, 63, 63, 63, 598 63, 63, 63, 63, 63, 63, 63 599 ]; 600 601 // decode one 64-entry block-- 602 int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b) 603 { 604 int diff,dc,k; 605 int t = decode(j, hdc); 606 if (t < 0) 607 throw new STBImageException("Bad huffman code, corrupt JPEG"); 608 609 // 0 all the ac values now so we can do it 32-bits at a time 610 memset(data.ptr,0,64*(data[0]).sizeof); 611 612 diff = t ? extend_receive(j, t) : 0; 613 dc = j.img_comp[b].dc_pred + diff; 614 j.img_comp[b].dc_pred = dc; 615 data[0] = cast(short) dc; 616 617 // decode AC components, see JPEG spec 618 k = 1; 619 do { 620 int r,s; 621 int rs = decode(j, hac); 622 if (rs < 0) 623 throw new STBImageException("Bad huffman code, corrupt JPEG"); 624 s = rs & 15; 625 r = rs >> 4; 626 if (s == 0) { 627 if (rs != 0xf0) break; // end block 628 k += 16; 629 } else { 630 k += r; 631 // decode into unzigzag'd location 632 data[dezigzag[k++]] = cast(short) extend_receive(j,s); 633 } 634 } while (k < 64); 635 return 1; 636 } 637 638 // take a -128..127 value and clamp it and convert to 0..255 639 ubyte clamp(int x) 640 { 641 // trick to use a single test to catch both cases 642 if (cast(uint) x > 255) { 643 if (x < 0) return 0; 644 if (x > 255) return 255; 645 } 646 return cast(ubyte) x; 647 } 648 649 int f2f(double x) 650 { 651 return cast(int)(x * 4096 + 0.5); 652 } 653 654 int fsh(int x) 655 { 656 return x << 12; 657 } 658 659 // derived from jidctint -- DCT_ISLOW 660 void IDCT_1D(int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7, 661 out int t0, out int t1, out int t2, out int t3, 662 out int x0, out int x1, out int x2, out int x3) 663 { 664 int p1,p2,p3,p4,p5; 665 p2 = s2; 666 p3 = s6; 667 p1 = (p2+p3) * f2f(0.5411961f); 668 t2 = p1 + p3*f2f(-1.847759065f); 669 t3 = p1 + p2*f2f( 0.765366865f); 670 p2 = s0; 671 p3 = s4; 672 t0 = fsh(p2+p3); 673 t1 = fsh(p2-p3); 674 x0 = t0+t3; 675 x3 = t0-t3; 676 x1 = t1+t2; 677 x2 = t1-t2; 678 t0 = s7; 679 t1 = s5; 680 t2 = s3; 681 t3 = s1; 682 p3 = t0+t2; 683 p4 = t1+t3; 684 p1 = t0+t3; 685 p2 = t1+t2; 686 p5 = (p3+p4)*f2f( 1.175875602f); 687 t0 = t0*f2f( 0.298631336f); 688 t1 = t1*f2f( 2.053119869f); 689 t2 = t2*f2f( 3.072711026f); 690 t3 = t3*f2f( 1.501321110f); 691 p1 = p5 + p1*f2f(-0.899976223f); 692 p2 = p5 + p2*f2f(-2.562915447f); 693 p3 = p3*f2f(-1.961570560f); 694 p4 = p4*f2f(-0.390180644f); 695 t3 += p1+p4; 696 t2 += p2+p3; 697 t1 += p2+p4; 698 t0 += p1+p3; 699 } 700 701 alias stbi_dequantize_t = ubyte; 702 703 // .344 seconds on 3*anemones.jpg 704 void idct_block(ubyte *out_, int out_stride, short data[64], stbi_dequantize_t *dequantize) 705 { 706 int i; 707 int[64] val; 708 int*v = val.ptr; 709 stbi_dequantize_t *dq = dequantize; 710 ubyte *o; 711 short *d = data.ptr; 712 713 // columns 714 for (i=0; i < 8; ++i,++d,++dq, ++v) { 715 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing 716 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 717 && d[40]==0 && d[48]==0 && d[56]==0) { 718 // no shortcut 0 seconds 719 // (1|2|3|4|5|6|7)==0 0 seconds 720 // all separate -0.047 seconds 721 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds 722 int dcterm = d[0] * dq[0] << 2; 723 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; 724 } else { 725 int t0, t1, t2, t3, x0, x1, x2, x3; 726 IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], 727 d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56], 728 t0, t1, t2, t3, x0, x1, x2, x3); 729 // constants scaled things up by 1<<12; let's bring them back 730 // down, but keep 2 extra bits of precision 731 x0 += 512; x1 += 512; x2 += 512; x3 += 512; 732 v[ 0] = (x0+t3) >> 10; 733 v[56] = (x0-t3) >> 10; 734 v[ 8] = (x1+t2) >> 10; 735 v[48] = (x1-t2) >> 10; 736 v[16] = (x2+t1) >> 10; 737 v[40] = (x2-t1) >> 10; 738 v[24] = (x3+t0) >> 10; 739 v[32] = (x3-t0) >> 10; 740 } 741 } 742 743 for (i=0, v=val.ptr, o=out_; i < 8; ++i,v+=8,o+=out_stride) { 744 745 // no fast case since the first 1D IDCT spread components out 746 int t0, t1, t2, t3, x0, x1, x2, x3; 747 IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7], t0, t1, t2, t3, x0, x1, x2, x3); 748 // constants scaled things up by 1<<12, plus we had 1<<2 from first 749 // loop, plus horizontal and vertical each scale by sqrt(8) so together 750 // we've got an extra 1<<3, so 1<<17 total we need to remove. 751 // so we want to round that, which means adding 0.5 * 1<<17, 752 // aka 65536. Also, we'll end up with -128 to 127 that we want 753 // to encode as 0..255 by adding 128, so we'll add that before the shift 754 x0 += 65536 + (128<<17); 755 x1 += 65536 + (128<<17); 756 x2 += 65536 + (128<<17); 757 x3 += 65536 + (128<<17); 758 // tried computing the shifts into temps, or'ing the temps to see 759 // if any were out of range, but that was slower 760 o[0] = clamp((x0+t3) >> 17); 761 o[7] = clamp((x0-t3) >> 17); 762 o[1] = clamp((x1+t2) >> 17); 763 o[6] = clamp((x1-t2) >> 17); 764 o[2] = clamp((x2+t1) >> 17); 765 o[5] = clamp((x2-t1) >> 17); 766 o[3] = clamp((x3+t0) >> 17); 767 o[4] = clamp((x3-t0) >> 17); 768 } 769 } 770 771 772 enum MARKER_none = 0xff; 773 774 // if there's a pending marker from the entropy stream, return that 775 // otherwise, fetch from the stream and get a marker. if there's no 776 // marker, return 0xff, which is never a valid marker value 777 ubyte get_marker(jpeg *j) 778 { 779 ubyte x; 780 if (j.marker != MARKER_none) { x = j.marker; j.marker = MARKER_none; return x; } 781 x = get8u(j.s); 782 if (x != 0xff) return MARKER_none; 783 while (x == 0xff) 784 x = get8u(j.s); 785 return x; 786 } 787 788 // in each scan, we'll have scan_n components, and the order 789 // of the components is specified by order[] 790 bool RESTART(int x) 791 { 792 return (x >= 0xd0) && (x <= 0xd7); 793 } 794 795 // after a restart interval, reset the entropy decoder and 796 // the dc prediction 797 void reset(jpeg *j) 798 { 799 j.code_bits = 0; 800 j.code_buffer = 0; 801 j.nomore = 0; 802 j.img_comp[0].dc_pred = j.img_comp[1].dc_pred = j.img_comp[2].dc_pred = 0; 803 j.marker = MARKER_none; 804 j.todo = j.restart_interval ? j.restart_interval : 0x7fffffff; 805 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, 806 // since we don't even allow 1<<30 pixels 807 } 808 809 int parse_entropy_coded_data(jpeg *z) 810 { 811 reset(z); 812 if (z.scan_n == 1) { 813 int i,j; 814 short data[64]; 815 int n = z.order[0]; 816 // non-interleaved data, we just need to process one block at a time, 817 // in trivial scanline order 818 // number of blocks to do just depends on how many actual "pixels" this 819 // component has, independent of interleaved MCU blocking and such 820 int w = (z.img_comp[n].x+7) >> 3; 821 int h = (z.img_comp[n].y+7) >> 3; 822 for (j=0; j < h; ++j) { 823 for (i=0; i < w; ++i) { 824 if (!decode_block(z, data, z.huff_dc.ptr+z.img_comp[n].hd, z.huff_ac.ptr+z.img_comp[n].ha, n)) return 0; 825 idct_block(z.img_comp[n].data+z.img_comp[n].w2*j*8+i*8, z.img_comp[n].w2, data, z.dequant[z.img_comp[n].tq].ptr); 826 // every data block is an MCU, so countdown the restart interval 827 if (--z.todo <= 0) { 828 if (z.code_bits < 24) grow_buffer_unsafe(z); 829 // if it's NOT a restart, then just bail, so we get corrupt data 830 // rather than no data 831 if (!RESTART(z.marker)) return 1; 832 reset(z); 833 } 834 } 835 } 836 } else { // interleaved! 837 int i,j,k,x,y; 838 short[64] data; 839 for (j=0; j < z.img_mcu_y; ++j) { 840 for (i=0; i < z.img_mcu_x; ++i) { 841 // scan an interleaved mcu... process scan_n components in order 842 for (k=0; k < z.scan_n; ++k) { 843 int n = z.order[k]; 844 // scan out an mcu's worth of this component; that's just determined 845 // by the basic H and V specified for the component 846 for (y=0; y < z.img_comp[n].v; ++y) { 847 for (x=0; x < z.img_comp[n].h; ++x) { 848 int x2 = (i*z.img_comp[n].h + x)*8; 849 int y2 = (j*z.img_comp[n].v + y)*8; 850 if (!decode_block(z, data, z.huff_dc.ptr+z.img_comp[n].hd, z.huff_ac.ptr+z.img_comp[n].ha, n)) return 0; 851 idct_block(z.img_comp[n].data+z.img_comp[n].w2*y2+x2, z.img_comp[n].w2, data, z.dequant[z.img_comp[n].tq].ptr); 852 } 853 } 854 } 855 // after all interleaved components, that's an interleaved MCU, 856 // so now count down the restart interval 857 if (--z.todo <= 0) { 858 if (z.code_bits < 24) grow_buffer_unsafe(z); 859 // if it's NOT a restart, then just bail, so we get corrupt data 860 // rather than no data 861 if (!RESTART(z.marker)) return 1; 862 reset(z); 863 } 864 } 865 } 866 } 867 return 1; 868 } 869 870 int process_marker(jpeg *z, int m) 871 { 872 int L; 873 switch (m) { 874 875 case MARKER_none: // no marker found 876 throw new STBImageException("Expected marker, corrupt JPEG"); 877 878 case 0xC2: // SOF - progressive 879 throw new STBImageException("JPEG format not supported (progressive)"); 880 881 case 0xDD: // DRI - specify restart interval 882 if (get16(z.s) != 4) 883 throw new STBImageException("Bad DRI len, corrupt JPEG"); 884 z.restart_interval = get16(z.s); 885 return 1; 886 887 case 0xDB: // DQT - define quantization table 888 L = get16(z.s)-2; 889 while (L > 0) { 890 int q = get8(z.s); 891 int p = q >> 4; 892 int t = q & 15,i; 893 if (p != 0) 894 throw new STBImageException("Bad DQT type, corrupt JPEG"); 895 if (t > 3) 896 throw new STBImageException("Bad DQT table, corrupt JPEG"); 897 for (i=0; i < 64; ++i) 898 z.dequant[t][dezigzag[i]] = get8u(z.s); 899 L -= 65; 900 } 901 return L==0; 902 903 case 0xC4: // DHT - define huffman table 904 L = get16(z.s)-2; 905 while (L > 0) { 906 ubyte *v; 907 int[16] sizes; 908 int i; 909 int m_ = 0; 910 int q = get8(z.s); 911 int tc = q >> 4; 912 int th = q & 15; 913 if (tc > 1 || th > 3) 914 throw new STBImageException("Bad DHT header, corrupt JPEG"); 915 for (i=0; i < 16; ++i) { 916 sizes[i] = get8(z.s); 917 m_ += sizes[i]; 918 } 919 L -= 17; 920 if (tc == 0) { 921 if (!build_huffman(z.huff_dc.ptr+th, sizes.ptr)) return 0; 922 v = z.huff_dc[th].values.ptr; 923 } else { 924 if (!build_huffman(z.huff_ac.ptr+th, sizes.ptr)) return 0; 925 v = z.huff_ac[th].values.ptr; 926 } 927 for (i=0; i < m_; ++i) 928 v[i] = get8u(z.s); 929 L -= m_; 930 } 931 return L==0; 932 933 default: 934 break; 935 } 936 // check for comment block or APP blocks 937 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { 938 skip(z.s, get16(z.s)-2); 939 return 1; 940 } 941 return 0; 942 } 943 944 // after we see SOS 945 int process_scan_header(jpeg *z) 946 { 947 int i; 948 int Ls = get16(z.s); 949 z.scan_n = get8(z.s); 950 if (z.scan_n < 1 || z.scan_n > 4 || z.scan_n > cast(int) z.s.img_n) 951 throw new STBImageException("Bad SOS component count, Corrupt JPEG"); 952 953 if (Ls != 6+2*z.scan_n) 954 throw new STBImageException("Bad SOS length, Corrupt JPEG"); 955 956 for (i=0; i < z.scan_n; ++i) { 957 int id = get8(z.s), which; 958 int q = get8(z.s); 959 for (which = 0; which < z.s.img_n; ++which) 960 if (z.img_comp[which].id == id) 961 break; 962 if (which == z.s.img_n) return 0; 963 z.img_comp[which].hd = q >> 4; 964 if (z.img_comp[which].hd > 3) 965 throw new STBImageException("Bad DC huff, Corrupt JPEG"); 966 z.img_comp[which].ha = q & 15; 967 if (z.img_comp[which].ha > 3) 968 throw new STBImageException("Bad AC huff, Corrupt JPEG"); 969 z.order[i] = which; 970 } 971 if (get8(z.s) != 0) 972 throw new STBImageException("Bad SOS, Corrupt JPEG"); 973 get8(z.s); // should be 63, but might be 0 974 if (get8(z.s) != 0) 975 throw new STBImageException("Bad SOS, Corrupt JPEG"); 976 977 return 1; 978 } 979 980 int process_frame_header(jpeg *z, int scan) 981 { 982 stbi *s = z.s; 983 int Lf,p,i,q, h_max=1,v_max=1,c; 984 Lf = get16(s); if (Lf < 11) throw new STBImageException("Bad SOF len, Corrupt JPEG"); 985 p = get8(s); if (p != 8) throw new STBImageException("JPEG format not supported: 8-bit only"); // JPEG baseline 986 s.img_y = get16(s); if (s.img_y == 0) throw new STBImageException("No header height, JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG 987 s.img_x = get16(s); if (s.img_x == 0) throw new STBImageException("0 width, corrupt JPEG"); // JPEG requires 988 c = get8(s); 989 if (c != 3 && c != 1) throw new STBImageException("Bad component count, corrupt JPEG"); // JFIF requires 990 s.img_n = c; 991 for (i=0; i < c; ++i) { 992 z.img_comp[i].data = null; 993 z.img_comp[i].linebuf = null; 994 } 995 996 if (Lf != 8+3*s.img_n) throw new STBImageException("Bad SOF len, corrupt JPEG"); 997 998 for (i=0; i < s.img_n; ++i) { 999 z.img_comp[i].id = get8(s); 1000 if (z.img_comp[i].id != i+1) // JFIF requires 1001 if (z.img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files! 1002 throw new STBImageException("Bad component ID, corrupt JPEG"); 1003 q = get8(s); 1004 z.img_comp[i].h = (q >> 4); if (!z.img_comp[i].h || z.img_comp[i].h > 4) throw new STBImageException("Bad H, corrupt JPEG"); 1005 z.img_comp[i].v = q & 15; if (!z.img_comp[i].v || z.img_comp[i].v > 4) throw new STBImageException("Bad V, corrupt JPEG"); 1006 z.img_comp[i].tq = get8(s); if (z.img_comp[i].tq > 3) throw new STBImageException("Bad TQ, corrupt JPEG"); 1007 } 1008 1009 if (scan != SCAN_load) return 1; 1010 1011 if ((1 << 30) / s.img_x / s.img_n < s.img_y) throw new STBImageException("Image too large to decode"); 1012 1013 for (i=0; i < s.img_n; ++i) { 1014 if (z.img_comp[i].h > h_max) h_max = z.img_comp[i].h; 1015 if (z.img_comp[i].v > v_max) v_max = z.img_comp[i].v; 1016 } 1017 1018 // compute interleaved mcu info 1019 z.img_h_max = h_max; 1020 z.img_v_max = v_max; 1021 z.img_mcu_w = h_max * 8; 1022 z.img_mcu_h = v_max * 8; 1023 z.img_mcu_x = (s.img_x + z.img_mcu_w-1) / z.img_mcu_w; 1024 z.img_mcu_y = (s.img_y + z.img_mcu_h-1) / z.img_mcu_h; 1025 1026 for (i=0; i < s.img_n; ++i) { 1027 // number of effective pixels (e.g. for non-interleaved MCU) 1028 z.img_comp[i].x = (s.img_x * z.img_comp[i].h + h_max-1) / h_max; 1029 z.img_comp[i].y = (s.img_y * z.img_comp[i].v + v_max-1) / v_max; 1030 // to simplify generation, we'll allocate enough memory to decode 1031 // the bogus oversized data from using interleaved MCUs and their 1032 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't 1033 // discard the extra data until colorspace conversion 1034 z.img_comp[i].w2 = z.img_mcu_x * z.img_comp[i].h * 8; 1035 z.img_comp[i].h2 = z.img_mcu_y * z.img_comp[i].v * 8; 1036 z.img_comp[i].raw_data = malloc(z.img_comp[i].w2 * z.img_comp[i].h2+15); 1037 if (z.img_comp[i].raw_data == null) { 1038 for(--i; i >= 0; --i) { 1039 free(z.img_comp[i].raw_data); 1040 z.img_comp[i].data = null; 1041 } 1042 throw new STBImageException("Out of memory"); 1043 } 1044 // align blocks for installable-idct using mmx/sse 1045 z.img_comp[i].data = cast(ubyte*) (( cast(size_t) z.img_comp[i].raw_data + 15) & ~15); 1046 z.img_comp[i].linebuf = null; 1047 } 1048 1049 return 1; 1050 } 1051 1052 // use comparisons since in some cases we handle more than one case (e.g. SOF) 1053 bool DNL(int x) { return x == 0xdc; } 1054 bool SOI(int x) { return x == 0xd8; } 1055 bool EOI(int x) { return x == 0xd9; } 1056 bool SOF(int x) { return x == 0xc0 || x == 0xc1; } 1057 bool SOS(int x) { return x == 0xda; } 1058 1059 int decode_jpeg_header(jpeg *z, int scan) 1060 { 1061 int m; 1062 z.marker = MARKER_none; // initialize cached marker to empty 1063 m = get_marker(z); 1064 if (!SOI(m)) throw new STBImageException("No SOI, corrupt JPEG"); 1065 if (scan == SCAN_type) return 1; 1066 m = get_marker(z); 1067 while (!SOF(m)) 1068 { 1069 1070 if (!process_marker(z,m)) return 0; 1071 m = get_marker(z); 1072 1073 1074 1075 while (m == MARKER_none) 1076 { 1077 // some files have extra padding after their blocks, so ok, we'll scan 1078 if (at_eof(z.s)) throw new STBImageException("No SOF, corrupt JPEG"); 1079 m = get_marker(z); 1080 } 1081 } 1082 if (!process_frame_header(z, scan)) return 0; 1083 return 1; 1084 } 1085 1086 int decode_jpeg_image(jpeg *j) 1087 { 1088 int m; 1089 j.restart_interval = 0; 1090 if (!decode_jpeg_header(j, SCAN_load)) return 0; 1091 m = get_marker(j); 1092 while (!EOI(m)) { 1093 if (SOS(m)) { 1094 if (!process_scan_header(j)) return 0; 1095 if (!parse_entropy_coded_data(j)) return 0; 1096 if (j.marker == MARKER_none ) { 1097 // handle 0s at the end of image data from IP Kamera 9060 1098 while (!at_eof(j.s)) { 1099 int x = get8(j.s); 1100 if (x == 255) { 1101 j.marker = get8u(j.s); 1102 break; 1103 } else if (x != 0) { 1104 return 0; 1105 } 1106 } 1107 // if we reach eof without hitting a marker, get_marker() below will fail and we'll eventually return 0 1108 } 1109 } else { 1110 if (!process_marker(j, m)) return 0; 1111 } 1112 m = get_marker(j); 1113 } 1114 return 1; 1115 } 1116 1117 // static jfif-centered resampling (across block boundaries) 1118 1119 alias resample_row_func = ubyte* function(ubyte *out_, ubyte *in0, ubyte *in1, int w, int hs); 1120 1121 ubyte div4(int x) 1122 { 1123 return cast(ubyte)(x >> 2); 1124 } 1125 1126 ubyte *resample_row_1(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1127 { 1128 return in_near; 1129 } 1130 1131 ubyte* resample_row_v_2(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1132 { 1133 // need to generate two samples vertically for every one in input 1134 int i; 1135 for (i=0; i < w; ++i) 1136 out_[i] = div4(3*in_near[i] + in_far[i] + 2); 1137 return out_; 1138 } 1139 1140 ubyte* resample_row_h_2(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1141 { 1142 // need to generate two samples horizontally for every one in input 1143 int i; 1144 ubyte *input = in_near; 1145 1146 if (w == 1) { 1147 // if only one sample, can't do any interpolation 1148 out_[0] = out_[1] = input[0]; 1149 return out_; 1150 } 1151 1152 out_[0] = input[0]; 1153 out_[1] = div4(input[0]*3 + input[1] + 2); 1154 for (i=1; i < w-1; ++i) { 1155 int n = 3*input[i]+2; 1156 out_[i*2+0] = div4(n+input[i-1]); 1157 out_[i*2+1] = div4(n+input[i+1]); 1158 } 1159 out_[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2); 1160 out_[i*2+1] = input[w-1]; 1161 1162 return out_; 1163 } 1164 1165 ubyte div16(int x) 1166 { 1167 return cast(ubyte)(x >> 4); 1168 } 1169 1170 1171 ubyte *resample_row_hv_2(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1172 { 1173 // need to generate 2x2 samples for every one in input 1174 int i,t0,t1; 1175 if (w == 1) { 1176 out_[0] = out_[1] = div4(3*in_near[0] + in_far[0] + 2); 1177 return out_; 1178 } 1179 1180 t1 = 3*in_near[0] + in_far[0]; 1181 out_[0] = div4(t1+2); 1182 for (i=1; i < w; ++i) { 1183 t0 = t1; 1184 t1 = 3*in_near[i]+in_far[i]; 1185 out_[i*2-1] = div16(3*t0 + t1 + 8); 1186 out_[i*2 ] = div16(3*t1 + t0 + 8); 1187 } 1188 out_[w*2-1] = div4(t1+2); 1189 1190 return out_; 1191 } 1192 1193 ubyte *resample_row_generic(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1194 { 1195 // resample with nearest-neighbor 1196 int i,j; 1197 in_far = in_far; 1198 for (i=0; i < w; ++i) 1199 for (j=0; j < hs; ++j) 1200 out_[i*hs+j] = in_near[i]; 1201 return out_; 1202 } 1203 1204 int float2fixed(double x) 1205 { 1206 return cast(int)((x) * 65536 + 0.5); 1207 } 1208 1209 // 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro) 1210 // VC6 without processor=Pro is generating multiple LEAs per multiply! 1211 void YCbCr_to_RGB_row(ubyte *out_, const ubyte *y, const ubyte *pcb, const ubyte *pcr, int count, int step) 1212 { 1213 int i; 1214 for (i=0; i < count; ++i) { 1215 int y_fixed = (y[i] << 16) + 32768; // rounding 1216 int r,g,b; 1217 int cr = pcr[i] - 128; 1218 int cb = pcb[i] - 128; 1219 r = y_fixed + cr*float2fixed(1.40200f); 1220 g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f); 1221 b = y_fixed + cb*float2fixed(1.77200f); 1222 r >>= 16; 1223 g >>= 16; 1224 b >>= 16; 1225 if (cast(uint) r > 255) { if (r < 0) r = 0; else r = 255; } 1226 if (cast(uint) g > 255) { if (g < 0) g = 0; else g = 255; } 1227 if (cast(uint) b > 255) { if (b < 0) b = 0; else b = 255; } 1228 out_[0] = cast(ubyte)r; 1229 out_[1] = cast(ubyte)g; 1230 out_[2] = cast(ubyte)b; 1231 out_[3] = 255; 1232 out_ += step; 1233 } 1234 } 1235 1236 // clean up the temporary component buffers 1237 void cleanup_jpeg(jpeg *j) 1238 { 1239 int i; 1240 for (i=0; i < j.s.img_n; ++i) { 1241 if (j.img_comp[i].data) { 1242 free(j.img_comp[i].raw_data); 1243 j.img_comp[i].data = null; 1244 } 1245 if (j.img_comp[i].linebuf) { 1246 free(j.img_comp[i].linebuf); 1247 j.img_comp[i].linebuf = null; 1248 } 1249 } 1250 } 1251 1252 struct stbi_resample 1253 { 1254 resample_row_func resample; 1255 ubyte* line0; 1256 ubyte* line1; 1257 int hs,vs; // expansion factor in each axis 1258 int w_lores; // horizontal pixels pre-expansion 1259 int ystep; // how far through vertical expansion we are 1260 int ypos; // which pre-expansion row we're on 1261 } ; 1262 1263 ubyte *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) 1264 { 1265 int n, decode_n; 1266 // validate req_comp 1267 if (req_comp < 0 || req_comp > 4) 1268 throw new STBImageException("Internal error: bad req_comp"); 1269 z.s.img_n = 0; 1270 1271 // load a jpeg image from whichever source 1272 if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return null; } 1273 1274 // determine actual number of components to generate 1275 n = req_comp ? req_comp : z.s.img_n; 1276 1277 if (z.s.img_n == 3 && n < 3) 1278 decode_n = 1; 1279 else 1280 decode_n = z.s.img_n; 1281 1282 // resample and color-convert 1283 { 1284 int k; 1285 uint i,j; 1286 ubyte *output; 1287 ubyte *coutput[4]; 1288 1289 stbi_resample res_comp[4]; 1290 1291 for (k=0; k < decode_n; ++k) { 1292 stbi_resample *r = &res_comp[k]; 1293 1294 // allocate line buffer big enough for upsampling off the edges 1295 // with upsample factor of 4 1296 z.img_comp[k].linebuf = cast(ubyte*) malloc(z.s.img_x + 3); 1297 if (!z.img_comp[k].linebuf) 1298 { 1299 cleanup_jpeg(z); 1300 throw new STBImageException("Out of memory"); 1301 } 1302 1303 r.hs = z.img_h_max / z.img_comp[k].h; 1304 r.vs = z.img_v_max / z.img_comp[k].v; 1305 r.ystep = r.vs >> 1; 1306 r.w_lores = (z.s.img_x + r.hs-1) / r.hs; 1307 r.ypos = 0; 1308 r.line0 = r.line1 = z.img_comp[k].data; 1309 1310 if (r.hs == 1 && r.vs == 1) r.resample = &resample_row_1; 1311 else if (r.hs == 1 && r.vs == 2) r.resample = &resample_row_v_2; 1312 else if (r.hs == 2 && r.vs == 1) r.resample = &resample_row_h_2; 1313 else if (r.hs == 2 && r.vs == 2) r.resample = &resample_row_hv_2; 1314 else r.resample = &resample_row_generic; 1315 } 1316 1317 // can't error after this so, this is safe 1318 output = cast(ubyte*) malloc(n * z.s.img_x * z.s.img_y + 1); 1319 if (!output) { cleanup_jpeg(z); throw new STBImageException("Out of memory"); } 1320 1321 // now go ahead and resample 1322 for (j=0; j < z.s.img_y; ++j) { 1323 ubyte *out_ = output + n * z.s.img_x * j; 1324 for (k=0; k < decode_n; ++k) { 1325 stbi_resample *r = &res_comp[k]; 1326 int y_bot = r.ystep >= (r.vs >> 1); 1327 coutput[k] = r.resample(z.img_comp[k].linebuf, 1328 y_bot ? r.line1 : r.line0, 1329 y_bot ? r.line0 : r.line1, 1330 r.w_lores, r.hs); 1331 if (++r.ystep >= r.vs) { 1332 r.ystep = 0; 1333 r.line0 = r.line1; 1334 if (++r.ypos < z.img_comp[k].y) 1335 r.line1 += z.img_comp[k].w2; 1336 } 1337 } 1338 if (n >= 3) { 1339 ubyte *y = coutput[0]; 1340 if (z.s.img_n == 3) { 1341 YCbCr_to_RGB_row(out_, y, coutput[1], coutput[2], z.s.img_x, n); 1342 } else 1343 for (i=0; i < z.s.img_x; ++i) { 1344 out_[0] = out_[1] = out_[2] = y[i]; 1345 out_[3] = 255; // not used if n==3 1346 out_ += n; 1347 } 1348 } else { 1349 ubyte *y = coutput[0]; 1350 if (n == 1) 1351 for (i=0; i < z.s.img_x; ++i) out_[i] = y[i]; 1352 else 1353 for (i=0; i < z.s.img_x; ++i) *out_++ = y[i], *out_++ = 255; 1354 } 1355 } 1356 cleanup_jpeg(z); 1357 *out_x = z.s.img_x; 1358 *out_y = z.s.img_y; 1359 if (comp) *comp = z.s.img_n; // report original components, not output 1360 return output; 1361 } 1362 } 1363 1364 ubyte* stbi_jpeg_load(stbi *s, int *x, int *y, int *comp, int req_comp) 1365 { 1366 jpeg j; 1367 j.s = s; 1368 return load_jpeg_image(&j, x,y,comp,req_comp); 1369 } 1370 1371 void stbi_jpeg_test(stbi *s) 1372 { 1373 jpeg j; 1374 j.s = s; 1375 int r = decode_jpeg_header(&j, SCAN_type); 1376 if (r == 0) 1377 throw new STBImageException("Couldn't decode JPEG header"); 1378 } 1379 1380 1381 // public domain zlib decode v0.2 Sean Barrett 2006-11-18 1382 // simple implementation 1383 // - all input must be provided in an upfront buffer 1384 // - all output is written to a single output buffer (can malloc/realloc) 1385 // performance 1386 // - fast huffman 1387 1388 // fast-way is faster to check than jpeg huffman, but slow way is slower 1389 enum ZFAST_BITS = 9; // accelerate all cases in default tables 1390 enum ZFAST_MASK = ((1 << ZFAST_BITS) - 1); 1391 1392 // zlib-style huffman encoding 1393 // (jpegs packs from left, zlib from right, so can't share code) 1394 struct zhuffman 1395 { 1396 ushort[1 << ZFAST_BITS] fast; 1397 ushort[16] firstcode; 1398 int[17] maxcode; 1399 ushort[16] firstsymbol; 1400 ubyte[288] size; 1401 ushort[288] value; 1402 } ; 1403 1404 int bitreverse16(int n) 1405 { 1406 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); 1407 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); 1408 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); 1409 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); 1410 return n; 1411 } 1412 1413 int bit_reverse(int v, int bits) 1414 { 1415 assert(bits <= 16); 1416 // to bit reverse n bits, reverse 16 and shift 1417 // e.g. 11 bits, bit reverse and shift away 5 1418 return bitreverse16(v) >> (16-bits); 1419 } 1420 1421 int zbuild_huffman(zhuffman *z, ubyte *sizelist, int num) 1422 { 1423 int i,k=0; 1424 int code; 1425 int[16] next_code; 1426 int[17] sizes; 1427 1428 // DEFLATE spec for generating codes 1429 memset(sizes.ptr, 0, sizes.sizeof); 1430 memset(z.fast.ptr, 255, z.fast.sizeof); 1431 for (i=0; i < num; ++i) 1432 ++sizes[sizelist[i]]; 1433 sizes[0] = 0; 1434 for (i=1; i < 16; ++i) 1435 assert(sizes[i] <= (1 << i)); 1436 code = 0; 1437 for (i=1; i < 16; ++i) { 1438 next_code[i] = code; 1439 z.firstcode[i] = cast(ushort) code; 1440 z.firstsymbol[i] = cast(ushort) k; 1441 code = (code + sizes[i]); 1442 if (sizes[i]) 1443 if (code-1 >= (1 << i)) 1444 throw new STBImageException("Bad codelength, corrupt JPEG"); 1445 z.maxcode[i] = code << (16-i); // preshift for inner loop 1446 code <<= 1; 1447 k += sizes[i]; 1448 } 1449 z.maxcode[16] = 0x10000; // sentinel 1450 for (i=0; i < num; ++i) { 1451 int s = sizelist[i]; 1452 if (s) { 1453 int c = next_code[s] - z.firstcode[s] + z.firstsymbol[s]; 1454 z.size[c] = cast(ubyte)s; 1455 z.value[c] = cast(ushort)i; 1456 if (s <= ZFAST_BITS) { 1457 int k_ = bit_reverse(next_code[s],s); 1458 while (k_ < (1 << ZFAST_BITS)) { 1459 z.fast[k_] = cast(ushort) c; 1460 k_ += (1 << s); 1461 } 1462 } 1463 ++next_code[s]; 1464 } 1465 } 1466 return 1; 1467 } 1468 1469 // zlib-from-memory implementation for PNG reading 1470 // because PNG allows splitting the zlib stream arbitrarily, 1471 // and it's annoying structurally to have PNG call ZLIB call PNG, 1472 // we require PNG read all the IDATs and combine them into a single 1473 // memory buffer 1474 1475 struct zbuf 1476 { 1477 const(ubyte) *zbuffer; 1478 const(ubyte) *zbuffer_end; 1479 int num_bits; 1480 uint code_buffer; 1481 1482 ubyte *zout; 1483 ubyte *zout_start; 1484 ubyte *zout_end; 1485 int z_expandable; 1486 1487 zhuffman z_length, z_distance; 1488 } ; 1489 1490 int zget8(zbuf *z) 1491 { 1492 if (z.zbuffer >= z.zbuffer_end) return 0; 1493 return *z.zbuffer++; 1494 } 1495 1496 void fill_bits(zbuf *z) 1497 { 1498 do { 1499 assert(z.code_buffer < (1U << z.num_bits)); 1500 z.code_buffer |= zget8(z) << z.num_bits; 1501 z.num_bits += 8; 1502 } while (z.num_bits <= 24); 1503 } 1504 1505 uint zreceive(zbuf *z, int n) 1506 { 1507 uint k; 1508 if (z.num_bits < n) fill_bits(z); 1509 k = z.code_buffer & ((1 << n) - 1); 1510 z.code_buffer >>= n; 1511 z.num_bits -= n; 1512 return k; 1513 } 1514 1515 int zhuffman_decode(zbuf *a, zhuffman *z) 1516 { 1517 int b,s,k; 1518 if (a.num_bits < 16) fill_bits(a); 1519 b = z.fast[a.code_buffer & ZFAST_MASK]; 1520 if (b < 0xffff) { 1521 s = z.size[b]; 1522 a.code_buffer >>= s; 1523 a.num_bits -= s; 1524 return z.value[b]; 1525 } 1526 1527 // not resolved by fast table, so compute it the slow way 1528 // use jpeg approach, which requires MSbits at top 1529 k = bit_reverse(a.code_buffer, 16); 1530 for (s=ZFAST_BITS+1; ; ++s) 1531 if (k < z.maxcode[s]) 1532 break; 1533 if (s == 16) return -1; // invalid code! 1534 // code size is s, so: 1535 b = (k >> (16-s)) - z.firstcode[s] + z.firstsymbol[s]; 1536 assert(z.size[b] == s); 1537 a.code_buffer >>= s; 1538 a.num_bits -= s; 1539 return z.value[b]; 1540 } 1541 1542 int expand(zbuf *z, int n) // need to make room for n bytes 1543 { 1544 ubyte *q; 1545 int cur, limit; 1546 if (!z.z_expandable) 1547 throw new STBImageException("Output buffer limit, corrupt PNG"); 1548 cur = cast(int) (z.zout - z.zout_start); 1549 limit = cast(int) (z.zout_end - z.zout_start); 1550 while (cur + n > limit) 1551 limit *= 2; 1552 q = cast(ubyte*) realloc(z.zout_start, limit); 1553 if (q == null) 1554 throw new STBImageException("Out of memory"); 1555 z.zout_start = q; 1556 z.zout = q + cur; 1557 z.zout_end = q + limit; 1558 return 1; 1559 } 1560 1561 static immutable int length_base[31] = [ 1562 3,4,5,6,7,8,9,10,11,13, 1563 15,17,19,23,27,31,35,43,51,59, 1564 67,83,99,115,131,163,195,227,258,0,0 ]; 1565 1566 static immutable int length_extra[31]= 1567 [ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 ]; 1568 1569 static immutable int dist_base[32] = [ 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 1570 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0]; 1571 1572 static immutable int dist_extra[32] = 1573 [ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13]; 1574 1575 int parse_huffman_block(zbuf *a) 1576 { 1577 for(;;) { 1578 int z = zhuffman_decode(a, &a.z_length); 1579 if (z < 256) { 1580 if (z < 0) 1581 throw new STBImageException("Bad Huffman code, corrupt PNG"); 1582 if (a.zout >= a.zout_end) if (!expand(a, 1)) return 0; 1583 *a.zout++ = cast(ubyte) z; 1584 } else { 1585 ubyte *p; 1586 int len,dist; 1587 if (z == 256) return 1; 1588 z -= 257; 1589 len = length_base[z]; 1590 if (length_extra[z]) len += zreceive(a, length_extra[z]); 1591 z = zhuffman_decode(a, &a.z_distance); 1592 if (z < 0) throw new STBImageException("Bad Huffman code, corrupt PNG"); 1593 dist = dist_base[z]; 1594 if (dist_extra[z]) dist += zreceive(a, dist_extra[z]); 1595 if (a.zout - a.zout_start < dist) throw new STBImageException("Bad dist, corrupt PNG"); 1596 if (a.zout + len > a.zout_end) if (!expand(a, len)) return 0; 1597 p = a.zout - dist; 1598 while (len--) 1599 *a.zout++ = *p++; 1600 } 1601 } 1602 } 1603 1604 int compute_huffman_codes(zbuf *a) 1605 { 1606 static immutable ubyte length_dezigzag[19] = [ 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 ]; 1607 zhuffman z_codelength; 1608 ubyte lencodes[286+32+137];//padding for maximum single op 1609 ubyte codelength_sizes[19]; 1610 int i,n; 1611 1612 int hlit = zreceive(a,5) + 257; 1613 int hdist = zreceive(a,5) + 1; 1614 int hclen = zreceive(a,4) + 4; 1615 1616 memset(codelength_sizes.ptr, 0, codelength_sizes.sizeof); 1617 for (i=0; i < hclen; ++i) { 1618 int s = zreceive(a,3); 1619 codelength_sizes[length_dezigzag[i]] = cast(ubyte) s; 1620 } 1621 if (!zbuild_huffman(&z_codelength, codelength_sizes.ptr, 19)) return 0; 1622 1623 n = 0; 1624 while (n < hlit + hdist) { 1625 int c = zhuffman_decode(a, &z_codelength); 1626 assert(c >= 0 && c < 19); 1627 if (c < 16) 1628 lencodes[n++] = cast(ubyte) c; 1629 else if (c == 16) { 1630 c = zreceive(a,2)+3; 1631 memset(lencodes.ptr+n, lencodes[n-1], c); 1632 n += c; 1633 } else if (c == 17) { 1634 c = zreceive(a,3)+3; 1635 memset(lencodes.ptr+n, 0, c); 1636 n += c; 1637 } else { 1638 assert(c == 18); 1639 c = zreceive(a,7)+11; 1640 memset(lencodes.ptr+n, 0, c); 1641 n += c; 1642 } 1643 } 1644 if (n != hlit+hdist) throw new STBImageException("Bad codelengths, corrupt PNG"); 1645 if (!zbuild_huffman(&a.z_length, lencodes.ptr, hlit)) return 0; 1646 if (!zbuild_huffman(&a.z_distance, lencodes.ptr+hlit, hdist)) return 0; 1647 return 1; 1648 } 1649 1650 int parse_uncompressed_block(zbuf *a) 1651 { 1652 ubyte header[4]; 1653 int len,nlen,k; 1654 if (a.num_bits & 7) 1655 zreceive(a, a.num_bits & 7); // discard 1656 // drain the bit-packed data into header 1657 k = 0; 1658 while (a.num_bits > 0) { 1659 header[k++] = cast(ubyte) (a.code_buffer & 255); // wtf this warns? 1660 a.code_buffer >>= 8; 1661 a.num_bits -= 8; 1662 } 1663 assert(a.num_bits == 0); 1664 // now fill header the normal way 1665 while (k < 4) 1666 header[k++] = cast(ubyte) zget8(a); 1667 len = header[1] * 256 + header[0]; 1668 nlen = header[3] * 256 + header[2]; 1669 if (nlen != (len ^ 0xffff)) throw new STBImageException("Zlib corrupt, corrupt PNG"); 1670 if (a.zbuffer + len > a.zbuffer_end) throw new STBImageException("Read past buffer, corrupt PNG"); 1671 if (a.zout + len > a.zout_end) 1672 if (!expand(a, len)) return 0; 1673 memcpy(a.zout, a.zbuffer, len); 1674 a.zbuffer += len; 1675 a.zout += len; 1676 return 1; 1677 } 1678 1679 int parse_zlib_header(zbuf *a) 1680 { 1681 int cmf = zget8(a); 1682 int cm = cmf & 15; 1683 /* int cinfo = cmf >> 4; */ 1684 int flg = zget8(a); 1685 if ((cmf*256+flg) % 31 != 0) throw new STBImageException("Bad zlib header, corrupt PNG"); // zlib spec 1686 if (flg & 32) throw new STBImageException("No preset dict, corrupt PNG"); // preset dictionary not allowed in png 1687 if (cm != 8) throw new STBImageException("Bad compression, corrupt PNG"); // DEFLATE required for png 1688 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output 1689 return 1; 1690 } 1691 1692 // @TODO: should statically initialize these for optimal thread safety 1693 __gshared ubyte[288] default_length; 1694 __gshared ubyte[32] default_distance; 1695 1696 void init_defaults() 1697 { 1698 int i; // use <= to match clearly with spec 1699 for (i=0; i <= 143; ++i) default_length[i] = 8; 1700 for ( ; i <= 255; ++i) default_length[i] = 9; 1701 for ( ; i <= 279; ++i) default_length[i] = 7; 1702 for ( ; i <= 287; ++i) default_length[i] = 8; 1703 1704 for (i=0; i <= 31; ++i) default_distance[i] = 5; 1705 } 1706 1707 __gshared int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead 1708 int parse_zlib(zbuf *a, int parse_header) 1709 { 1710 int final_, type; 1711 if (parse_header) 1712 if (!parse_zlib_header(a)) return 0; 1713 a.num_bits = 0; 1714 a.code_buffer = 0; 1715 do { 1716 final_ = zreceive(a,1); 1717 type = zreceive(a,2); 1718 if (type == 0) { 1719 if (!parse_uncompressed_block(a)) return 0; 1720 } else if (type == 3) { 1721 return 0; 1722 } else { 1723 if (type == 1) { 1724 // use fixed code lengths 1725 if (!default_distance[31]) init_defaults(); 1726 if (!zbuild_huffman(&a.z_length , default_length.ptr , 288)) return 0; 1727 if (!zbuild_huffman(&a.z_distance, default_distance.ptr, 32)) return 0; 1728 } else { 1729 if (!compute_huffman_codes(a)) return 0; 1730 } 1731 if (!parse_huffman_block(a)) return 0; 1732 } 1733 if (stbi_png_partial && a.zout - a.zout_start > 65536) 1734 break; 1735 } while (!final_); 1736 return 1; 1737 } 1738 1739 int do_zlib(zbuf *a, ubyte *obuf, int olen, int exp, int parse_header) 1740 { 1741 a.zout_start = obuf; 1742 a.zout = obuf; 1743 a.zout_end = obuf + olen; 1744 a.z_expandable = exp; 1745 1746 return parse_zlib(a, parse_header); 1747 } 1748 1749 ubyte *stbi_zlib_decode_malloc_guesssize(const(ubyte) *buffer, int len, int initial_size, int *outlen) 1750 { 1751 zbuf a; 1752 ubyte *p = cast(ubyte*) malloc(initial_size); 1753 if (p == null) return null; 1754 a.zbuffer = buffer; 1755 a.zbuffer_end = buffer + len; 1756 if (do_zlib(&a, p, initial_size, 1, 1)) { 1757 if (outlen) *outlen = cast(int) (a.zout - a.zout_start); 1758 return a.zout_start; 1759 } else { 1760 free(a.zout_start); 1761 return null; 1762 } 1763 } 1764 1765 ubyte *stbi_zlib_decode_malloc(const(ubyte) *buffer, int len, int *outlen) 1766 { 1767 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); 1768 } 1769 1770 ubyte *stbi_zlib_decode_malloc_guesssize_headerflag(const(ubyte) *buffer, int len, int initial_size, int *outlen, int parse_header) 1771 { 1772 zbuf a; 1773 ubyte *p = cast(ubyte*) malloc(initial_size); 1774 if (p == null) return null; 1775 a.zbuffer = buffer; 1776 a.zbuffer_end = buffer + len; 1777 if (do_zlib(&a, p, initial_size, 1, parse_header)) { 1778 if (outlen) *outlen = cast(int) (a.zout - a.zout_start); 1779 return a.zout_start; 1780 } else { 1781 free(a.zout_start); 1782 return null; 1783 } 1784 } 1785 1786 int stbi_zlib_decode_buffer(ubyte* obuffer, int olen, const(ubyte)* ibuffer, int ilen) 1787 { 1788 zbuf a; 1789 a.zbuffer = ibuffer; 1790 a.zbuffer_end = ibuffer + ilen; 1791 if (do_zlib(&a, obuffer, olen, 0, 1)) 1792 return cast(int) (a.zout - a.zout_start); 1793 else 1794 return -1; 1795 } 1796 1797 ubyte *stbi_zlib_decode_noheader_malloc(const(ubyte) *buffer, int len, int *outlen) 1798 { 1799 zbuf a; 1800 ubyte *p = cast(ubyte*) malloc(16384); 1801 if (p == null) return null; 1802 a.zbuffer = buffer; 1803 a.zbuffer_end = buffer+len; 1804 if (do_zlib(&a, p, 16384, 1, 0)) { 1805 if (outlen) *outlen = cast(int) (a.zout - a.zout_start); 1806 return a.zout_start; 1807 } else { 1808 free(a.zout_start); 1809 return null; 1810 } 1811 } 1812 1813 int stbi_zlib_decode_noheader_buffer(ubyte *obuffer, int olen, const(ubyte) *ibuffer, int ilen) 1814 { 1815 zbuf a; 1816 a.zbuffer = ibuffer; 1817 a.zbuffer_end = ibuffer + ilen; 1818 if (do_zlib(&a, obuffer, olen, 0, 0)) 1819 return cast(int) (a.zout - a.zout_start); 1820 else 1821 return -1; 1822 } 1823 1824 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 1825 // simple implementation 1826 // - only 8-bit samples 1827 // - no CRC checking 1828 // - allocates lots of intermediate memory 1829 // - avoids problem of streaming data between subsystems 1830 // - avoids explicit window management 1831 // performance 1832 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding 1833 1834 1835 struct chunk 1836 { 1837 uint length; 1838 uint type; 1839 } 1840 1841 uint PNG_TYPE(ubyte a, ubyte b, ubyte c, ubyte d) 1842 { 1843 return (a << 24) + (b << 16) + (c << 8) + d; 1844 } 1845 1846 chunk get_chunk_header(stbi *s) 1847 { 1848 chunk c; 1849 c.length = get32(s); 1850 c.type = get32(s); 1851 return c; 1852 } 1853 1854 static int check_png_header(stbi *s) 1855 { 1856 static immutable ubyte[8] png_sig = [ 137, 80, 78, 71, 13, 10, 26, 10 ]; 1857 for (int i = 0; i < 8; ++i) 1858 { 1859 ubyte headerByte = get8u(s); 1860 ubyte expected = png_sig[i]; 1861 if (headerByte != expected) 1862 throw new STBImageException("Bad PNG sig, not a PNG"); 1863 } 1864 return 1; 1865 } 1866 1867 struct png 1868 { 1869 stbi *s; 1870 ubyte *idata; 1871 ubyte *expanded; 1872 ubyte *out_; 1873 } 1874 1875 1876 enum : int 1877 { 1878 F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4, 1879 F_avg_first, F_paeth_first 1880 } 1881 1882 static immutable ubyte[5] first_row_filter = 1883 [ 1884 F_none, F_sub, F_none, F_avg_first, F_paeth_first 1885 ]; 1886 1887 static int paeth(int a, int b, int c) 1888 { 1889 int p = a + b - c; 1890 int pa = abs(p-a); 1891 int pb = abs(p-b); 1892 int pc = abs(p-c); 1893 if (pa <= pb && pa <= pc) return a; 1894 if (pb <= pc) return b; 1895 return c; 1896 } 1897 1898 // create the png data from post-deflated data 1899 static int create_png_image_raw(png *a, ubyte *raw, uint raw_len, int out_n, uint x, uint y) 1900 { 1901 stbi *s = a.s; 1902 uint i,j,stride = x*out_n; 1903 int k; 1904 int img_n = s.img_n; // copy it into a local for later 1905 assert(out_n == s.img_n || out_n == s.img_n+1); 1906 if (stbi_png_partial) y = 1; 1907 a.out_ = cast(ubyte*) malloc(x * y * out_n); 1908 if (!a.out_) throw new STBImageException("Out of memory"); 1909 if (!stbi_png_partial) { 1910 if (s.img_x == x && s.img_y == y) { 1911 if (raw_len != (img_n * x + 1) * y) throw new STBImageException("Not enough pixels, corrupt PNG"); 1912 } else { // interlaced: 1913 if (raw_len < (img_n * x + 1) * y) throw new STBImageException("Not enough pixels, corrupt PNG"); 1914 } 1915 } 1916 for (j=0; j < y; ++j) { 1917 ubyte *cur = a.out_ + stride*j; 1918 ubyte *prior = cur - stride; 1919 int filter = *raw++; 1920 if (filter > 4) throw new STBImageException("Invalid filter, corrupt PNG"); 1921 // if first row, use special filter that doesn't sample previous row 1922 if (j == 0) filter = first_row_filter[filter]; 1923 // handle first pixel explicitly 1924 for (k=0; k < img_n; ++k) { 1925 switch (filter) { 1926 case F_none : cur[k] = raw[k]; break; 1927 case F_sub : cur[k] = raw[k]; break; 1928 case F_up : cur[k] = cast(ubyte)(raw[k] + prior[k]); break; 1929 case F_avg : cur[k] = cast(ubyte)(raw[k] + (prior[k]>>1)); break; 1930 case F_paeth : cur[k] = cast(ubyte) (raw[k] + paeth(0,prior[k],0)); break; 1931 case F_avg_first : cur[k] = raw[k]; break; 1932 case F_paeth_first: cur[k] = raw[k]; break; 1933 default: break; 1934 } 1935 } 1936 if (img_n != out_n) cur[img_n] = 255; 1937 raw += img_n; 1938 cur += out_n; 1939 prior += out_n; 1940 // this is a little gross, so that we don't switch per-pixel or per-component 1941 if (img_n == out_n) { 1942 1943 for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) 1944 for (k=0; k < img_n; ++k) 1945 { 1946 switch (filter) { 1947 case F_none: cur[k] = raw[k]; break; 1948 case F_sub: cur[k] = cast(ubyte)(raw[k] + cur[k-img_n]); break; 1949 case F_up: cur[k] = cast(ubyte)(raw[k] + prior[k]); break; 1950 case F_avg: cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-img_n])>>1)); break; 1951 case F_paeth: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break; 1952 case F_avg_first: cur[k] = cast(ubyte)(raw[k] + (cur[k-img_n] >> 1)); break; 1953 case F_paeth_first: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],0,0)); break; 1954 default: break; 1955 } 1956 } 1957 } else { 1958 assert(img_n+1 == out_n); 1959 1960 for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) 1961 for (k=0; k < img_n; ++k) 1962 { 1963 switch (filter) { 1964 case F_none: cur[k] = raw[k]; break; 1965 case F_sub: cur[k] = cast(ubyte)(raw[k] + cur[k-out_n]); break; 1966 case F_up: cur[k] = cast(ubyte)(raw[k] + prior[k]); break; 1967 case F_avg: cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-out_n])>>1)); break; 1968 case F_paeth: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break; 1969 case F_avg_first: cur[k] = cast(ubyte)(raw[k] + (cur[k-out_n] >> 1)); break; 1970 case F_paeth_first: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],0,0)); break; 1971 default: break; 1972 } 1973 } 1974 } 1975 } 1976 return 1; 1977 } 1978 1979 int create_png_image(png *a, ubyte *raw, uint raw_len, int out_n, int interlaced) 1980 { 1981 ubyte *final_; 1982 int p; 1983 int save; 1984 if (!interlaced) 1985 return create_png_image_raw(a, raw, raw_len, out_n, a.s.img_x, a.s.img_y); 1986 save = stbi_png_partial; 1987 stbi_png_partial = 0; 1988 1989 // de-interlacing 1990 final_ = cast(ubyte*) malloc(a.s.img_x * a.s.img_y * out_n); 1991 for (p=0; p < 7; ++p) { 1992 int xorig[] = [ 0,4,0,2,0,1,0 ]; 1993 int yorig[] = [ 0,0,4,0,2,0,1 ]; 1994 int xspc[] = [ 8,8,4,4,2,2,1 ]; 1995 int yspc[] = [ 8,8,8,4,4,2,2 ]; 1996 int i,j,x,y; 1997 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 1998 x = (a.s.img_x - xorig[p] + xspc[p]-1) / xspc[p]; 1999 y = (a.s.img_y - yorig[p] + yspc[p]-1) / yspc[p]; 2000 if (x && y) { 2001 if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) { 2002 free(final_); 2003 return 0; 2004 } 2005 for (j=0; j < y; ++j) 2006 for (i=0; i < x; ++i) 2007 memcpy(final_ + (j*yspc[p]+yorig[p])*a.s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n, 2008 a.out_ + (j*x+i)*out_n, out_n); 2009 free(a.out_); 2010 raw += (x*out_n+1)*y; 2011 raw_len -= (x*out_n+1)*y; 2012 } 2013 } 2014 a.out_ = final_; 2015 2016 stbi_png_partial = save; 2017 return 1; 2018 } 2019 2020 static int compute_transparency(png *z, ubyte tc[3], int out_n) 2021 { 2022 stbi *s = z.s; 2023 uint i, pixel_count = s.img_x * s.img_y; 2024 ubyte *p = z.out_; 2025 2026 // compute color-based transparency, assuming we've 2027 // already got 255 as the alpha value in the output 2028 assert(out_n == 2 || out_n == 4); 2029 2030 if (out_n == 2) { 2031 for (i=0; i < pixel_count; ++i) { 2032 p[1] = (p[0] == tc[0] ? 0 : 255); 2033 p += 2; 2034 } 2035 } else { 2036 for (i=0; i < pixel_count; ++i) { 2037 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) 2038 p[3] = 0; 2039 p += 4; 2040 } 2041 } 2042 return 1; 2043 } 2044 2045 int expand_palette(png *a, ubyte *palette, int len, int pal_img_n) 2046 { 2047 uint i, pixel_count = a.s.img_x * a.s.img_y; 2048 ubyte *p; 2049 ubyte *temp_out; 2050 ubyte *orig = a.out_; 2051 2052 p = cast(ubyte*) malloc(pixel_count * pal_img_n); 2053 if (p == null) 2054 throw new STBImageException("Out of memory"); 2055 2056 // between here and free(out) below, exitting would leak 2057 temp_out = p; 2058 2059 if (pal_img_n == 3) { 2060 for (i=0; i < pixel_count; ++i) { 2061 int n = orig[i]*4; 2062 p[0] = palette[n ]; 2063 p[1] = palette[n+1]; 2064 p[2] = palette[n+2]; 2065 p += 3; 2066 } 2067 } else { 2068 for (i=0; i < pixel_count; ++i) { 2069 int n = orig[i]*4; 2070 p[0] = palette[n ]; 2071 p[1] = palette[n+1]; 2072 p[2] = palette[n+2]; 2073 p[3] = palette[n+3]; 2074 p += 4; 2075 } 2076 } 2077 free(a.out_); 2078 a.out_ = temp_out; 2079 2080 return 1; 2081 } 2082 2083 int parse_png_file(png *z, int scan, int req_comp) 2084 { 2085 ubyte[1024] palette; 2086 ubyte pal_img_n=0; 2087 ubyte has_trans=0; 2088 ubyte tc[3]; 2089 uint ioff=0, idata_limit=0, i, pal_len=0; 2090 int first=1,k,interlace=0; 2091 stbi *s = z.s; 2092 2093 z.expanded = null; 2094 z.idata = null; 2095 z.out_ = null; 2096 2097 if (!check_png_header(s)) return 0; 2098 2099 if (scan == SCAN_type) return 1; 2100 2101 for (;;) { 2102 chunk c = get_chunk_header(s); 2103 switch (c.type) { 2104 case PNG_TYPE('I','H','D','R'): { 2105 int depth,color,comp,filter; 2106 if (!first) throw new STBImageException("Multiple IHDR, corrupt PNG"); 2107 first = 0; 2108 if (c.length != 13) throw new STBImageException("Bad IHDR len, corrupt PNG"); 2109 s.img_x = get32(s); if (s.img_x > (1 << 24)) throw new STBImageException("Very large image (corrupt?)"); 2110 s.img_y = get32(s); if (s.img_y > (1 << 24)) throw new STBImageException("Very large image (corrupt?)"); 2111 depth = get8(s); if (depth != 8) throw new STBImageException("8bit only, PNG not supported: 8-bit only"); 2112 color = get8(s); if (color > 6) throw new STBImageException("Bad ctype, corrupt PNG"); 2113 if (color == 3) pal_img_n = 3; else if (color & 1) throw new STBImageException("Bad ctype, corrupt PNG"); 2114 comp = get8(s); if (comp) throw new STBImageException("Bad comp method, corrupt PNG"); 2115 filter= get8(s); if (filter) throw new STBImageException("Bad filter method, corrupt PNG"); 2116 interlace = get8(s); if (interlace>1) throw new STBImageException("Bad interlace method, corrupt PNG"); 2117 if (!s.img_x || !s.img_y) throw new STBImageException("0-pixel image, corrupt PNG"); 2118 if (!pal_img_n) { 2119 s.img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); 2120 if ((1 << 30) / s.img_x / s.img_n < s.img_y) throw new STBImageException("Image too large to decode"); 2121 if (scan == SCAN_header) return 1; 2122 } else { 2123 // if paletted, then pal_n is our final components, and 2124 // img_n is # components to decompress/filter. 2125 s.img_n = 1; 2126 if ((1 << 30) / s.img_x / 4 < s.img_y) throw new STBImageException("Too large, corrupt PNG"); 2127 // if SCAN_header, have to scan to see if we have a tRNS 2128 } 2129 break; 2130 } 2131 2132 case PNG_TYPE('P','L','T','E'): { 2133 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2134 if (c.length > 256*3) throw new STBImageException("invalid PLTE, corrupt PNG"); 2135 pal_len = c.length / 3; 2136 if (pal_len * 3 != c.length) throw new STBImageException("invalid PLTE, corrupt PNG"); 2137 for (i=0; i < pal_len; ++i) { 2138 palette[i*4+0] = get8u(s); 2139 palette[i*4+1] = get8u(s); 2140 palette[i*4+2] = get8u(s); 2141 palette[i*4+3] = 255; 2142 } 2143 break; 2144 } 2145 2146 case PNG_TYPE('t','R','N','S'): { 2147 if (first) throw new STBImageException("first not IHDR, cCorrupt PNG"); 2148 if (z.idata) throw new STBImageException("tRNS after IDAT, corrupt PNG"); 2149 if (pal_img_n) { 2150 if (scan == SCAN_header) { s.img_n = 4; return 1; } 2151 if (pal_len == 0) throw new STBImageException("tRNS before PLTE, corrupt PNG"); 2152 if (c.length > pal_len) throw new STBImageException("bad tRNS len, corrupt PNG"); 2153 pal_img_n = 4; 2154 for (i=0; i < c.length; ++i) 2155 palette[i*4+3] = get8u(s); 2156 } else { 2157 if (!(s.img_n & 1)) throw new STBImageException("tRNS with alpha, corrupt PNG"); 2158 if (c.length != cast(uint) s.img_n*2) throw new STBImageException("bad tRNS len, corrupt PNG"); 2159 has_trans = 1; 2160 for (k=0; k < s.img_n; ++k) 2161 tc[k] = cast(ubyte) get16(s); // non 8-bit images will be larger 2162 } 2163 break; 2164 } 2165 2166 case PNG_TYPE('I','D','A','T'): { 2167 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2168 if (pal_img_n && !pal_len) throw new STBImageException("no PLTE, corrupt PNG"); 2169 if (scan == SCAN_header) { s.img_n = pal_img_n; return 1; } 2170 if (ioff + c.length > idata_limit) { 2171 ubyte *p; 2172 if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; 2173 while (ioff + c.length > idata_limit) 2174 idata_limit *= 2; 2175 p = cast(ubyte*) realloc(z.idata, idata_limit); if (p == null) throw new STBImageException("outofmem, cOut of memory"); 2176 z.idata = p; 2177 } 2178 if (!getn(s, z.idata+ioff,c.length)) throw new STBImageException("outofdata, corrupt PNG"); 2179 ioff += c.length; 2180 break; 2181 } 2182 2183 case PNG_TYPE('I','E','N','D'): { 2184 uint raw_len; 2185 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2186 if (scan != SCAN_load) return 1; 2187 if (z.idata == null) throw new STBImageException("no IDAT, corrupt PNG"); 2188 z.expanded = stbi_zlib_decode_malloc_guesssize_headerflag(z.idata, ioff, 16384, cast(int *) &raw_len, 1); 2189 if (z.expanded == null) return 0; // zlib should set error 2190 free(z.idata); z.idata = null; 2191 if ((req_comp == s.img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) 2192 s.img_out_n = s.img_n+1; 2193 else 2194 s.img_out_n = s.img_n; 2195 if (!create_png_image(z, z.expanded, raw_len, s.img_out_n, interlace)) return 0; 2196 if (has_trans) 2197 if (!compute_transparency(z, tc, s.img_out_n)) return 0; 2198 if (pal_img_n) { 2199 // pal_img_n == 3 or 4 2200 s.img_n = pal_img_n; // record the actual colors we had 2201 s.img_out_n = pal_img_n; 2202 if (req_comp >= 3) s.img_out_n = req_comp; 2203 if (!expand_palette(z, palette.ptr, pal_len, s.img_out_n)) 2204 return 0; 2205 } 2206 free(z.expanded); z.expanded = null; 2207 return 1; 2208 } 2209 2210 default: 2211 // if critical, fail 2212 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2213 if ((c.type & (1 << 29)) == 0) { 2214 2215 throw new STBImageException("PNG not supported: unknown chunk type"); 2216 } 2217 skip(s, c.length); 2218 break; 2219 } 2220 // end of chunk, read and skip CRC 2221 get32(s); 2222 } 2223 } 2224 2225 ubyte *do_png(png *p, int *x, int *y, int *n, int req_comp) 2226 { 2227 ubyte *result=null; 2228 if (req_comp < 0 || req_comp > 4) 2229 throw new STBImageException("Internal error: bad req_comp"); 2230 if (parse_png_file(p, SCAN_load, req_comp)) { 2231 result = p.out_; 2232 p.out_ = null; 2233 if (req_comp && req_comp != p.s.img_out_n) { 2234 result = convert_format(result, p.s.img_out_n, req_comp, p.s.img_x, p.s.img_y); 2235 p.s.img_out_n = req_comp; 2236 if (result == null) return result; 2237 } 2238 *x = p.s.img_x; 2239 *y = p.s.img_y; 2240 if (n) *n = p.s.img_n; 2241 } 2242 free(p.out_); p.out_ = null; 2243 free(p.expanded); p.expanded = null; 2244 free(p.idata); p.idata = null; 2245 2246 return result; 2247 } 2248 2249 ubyte *stbi_png_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2250 { 2251 png p; 2252 p.s = s; 2253 return do_png(&p, x,y,comp,req_comp); 2254 } 2255 2256 void stbi_png_test(stbi *s) 2257 { 2258 int r = check_png_header(s); 2259 if (r == 0) 2260 throw new STBImageException("Couldn't decode PNG header"); 2261 } 2262 2263 // Microsoft/Windows BMP image 2264 2265 void stbi_bmp_test(stbi *s) 2266 { 2267 if (get8(s) != 'B') throw new STBImageException("Couldn't decode BMP header"); 2268 if (get8(s) != 'M') throw new STBImageException("Couldn't decode BMP header"); 2269 get32le(s); // discard filesize 2270 get16le(s); // discard reserved 2271 get16le(s); // discard reserved 2272 get32le(s); // discard data offset 2273 int sz = get32le(s); 2274 if (sz == 12 || sz == 40 || sz == 56 || sz == 108) 2275 return; 2276 2277 throw new STBImageException("Couldn't decode BMP header"); 2278 } 2279 2280 2281 // returns 0..31 for the highest set bit 2282 int high_bit(uint z) 2283 { 2284 int n=0; 2285 if (z == 0) return -1; 2286 if (z >= 0x10000) n += 16, z >>= 16; 2287 if (z >= 0x00100) n += 8, z >>= 8; 2288 if (z >= 0x00010) n += 4, z >>= 4; 2289 if (z >= 0x00004) n += 2, z >>= 2; 2290 if (z >= 0x00002) n += 1, z >>= 1; 2291 return n; 2292 } 2293 2294 int bitcount(uint a) 2295 { 2296 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 2297 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 2298 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits 2299 a = (a + (a >> 8)); // max 16 per 8 bits 2300 a = (a + (a >> 16)); // max 32 per 8 bits 2301 return a & 0xff; 2302 } 2303 2304 int shiftsigned(int v, int shift, int bits) 2305 { 2306 int result; 2307 int z=0; 2308 2309 if (shift < 0) v <<= -shift; 2310 else v >>= shift; 2311 result = v; 2312 2313 z = bits; 2314 while (z < 8) { 2315 result += v >> z; 2316 z += bits; 2317 } 2318 return result; 2319 } 2320 2321 ubyte *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2322 { 2323 ubyte *out_; 2324 uint mr=0,mg=0,mb=0,ma=0, fake_a=0; 2325 ubyte pal[256][4]; 2326 int psize=0,i,j,compress=0,width; 2327 int bpp, flip_vertically, pad, target, offset, hsz; 2328 if (get8(s) != 'B' || get8(s) != 'M') throw new STBImageException("not BMP, Corrupt BMP"); 2329 get32le(s); // discard filesize 2330 get16le(s); // discard reserved 2331 get16le(s); // discard reserved 2332 offset = get32le(s); 2333 hsz = get32le(s); 2334 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) throw new STBImageException("unknown BMP, BMP type not supported: unknown"); 2335 if (hsz == 12) { 2336 s.img_x = get16le(s); 2337 s.img_y = get16le(s); 2338 } else { 2339 s.img_x = get32le(s); 2340 s.img_y = get32le(s); 2341 } 2342 if (get16le(s) != 1) throw new STBImageException("bad BMP"); 2343 bpp = get16le(s); 2344 if (bpp == 1) throw new STBImageException("monochrome, BMP type not supported: 1-bit"); 2345 flip_vertically = (cast(int) s.img_y) > 0; 2346 s.img_y = abs(cast(int) s.img_y); 2347 if (hsz == 12) { 2348 if (bpp < 24) 2349 psize = (offset - 14 - 24) / 3; 2350 } else { 2351 compress = get32le(s); 2352 if (compress == 1 || compress == 2) throw new STBImageException("BMP RLE, BMP type not supported: RLE"); 2353 get32le(s); // discard sizeof 2354 get32le(s); // discard hres 2355 get32le(s); // discard vres 2356 get32le(s); // discard colorsused 2357 get32le(s); // discard max important 2358 if (hsz == 40 || hsz == 56) { 2359 if (hsz == 56) { 2360 get32le(s); 2361 get32le(s); 2362 get32le(s); 2363 get32le(s); 2364 } 2365 if (bpp == 16 || bpp == 32) { 2366 mr = mg = mb = 0; 2367 if (compress == 0) { 2368 if (bpp == 32) { 2369 mr = 0xffu << 16; 2370 mg = 0xffu << 8; 2371 mb = 0xffu << 0; 2372 ma = 0xffu << 24; 2373 fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255 2374 } else { 2375 mr = 31u << 10; 2376 mg = 31u << 5; 2377 mb = 31u << 0; 2378 } 2379 } else if (compress == 3) { 2380 mr = get32le(s); 2381 mg = get32le(s); 2382 mb = get32le(s); 2383 // not documented, but generated by photoshop and handled by mspaint 2384 if (mr == mg && mg == mb) { 2385 // ?!?!? 2386 throw new STBImageException("bad BMP"); 2387 } 2388 } else 2389 throw new STBImageException("bad BMP"); 2390 } 2391 } else { 2392 assert(hsz == 108); 2393 mr = get32le(s); 2394 mg = get32le(s); 2395 mb = get32le(s); 2396 ma = get32le(s); 2397 get32le(s); // discard color space 2398 for (i=0; i < 12; ++i) 2399 get32le(s); // discard color space parameters 2400 } 2401 if (bpp < 16) 2402 psize = (offset - 14 - hsz) >> 2; 2403 } 2404 s.img_n = ma ? 4 : 3; 2405 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 2406 target = req_comp; 2407 else 2408 target = s.img_n; // if they want monochrome, we'll post-convert 2409 out_ = cast(ubyte*) malloc(target * s.img_x * s.img_y); 2410 if (!out_) throw new STBImageException("Out of memory"); 2411 if (bpp < 16) { 2412 int z=0; 2413 if (psize == 0 || psize > 256) { free(out_); throw new STBImageException("invalid, Corrupt BMP"); } 2414 for (i=0; i < psize; ++i) { 2415 pal[i][2] = get8u(s); 2416 pal[i][1] = get8u(s); 2417 pal[i][0] = get8u(s); 2418 if (hsz != 12) get8(s); 2419 pal[i][3] = 255; 2420 } 2421 skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); 2422 if (bpp == 4) width = (s.img_x + 1) >> 1; 2423 else if (bpp == 8) width = s.img_x; 2424 else { free(out_); throw new STBImageException("bad bpp, corrupt BMP"); } 2425 pad = (-width)&3; 2426 for (j=0; j < cast(int) s.img_y; ++j) { 2427 for (i=0; i < cast(int) s.img_x; i += 2) { 2428 int v=get8(s),v2=0; 2429 if (bpp == 4) { 2430 v2 = v & 15; 2431 v >>= 4; 2432 } 2433 out_[z++] = pal[v][0]; 2434 out_[z++] = pal[v][1]; 2435 out_[z++] = pal[v][2]; 2436 if (target == 4) out_[z++] = 255; 2437 if (i+1 == cast(int) s.img_x) break; 2438 v = (bpp == 8) ? get8(s) : v2; 2439 out_[z++] = pal[v][0]; 2440 out_[z++] = pal[v][1]; 2441 out_[z++] = pal[v][2]; 2442 if (target == 4) out_[z++] = 255; 2443 } 2444 skip(s, pad); 2445 } 2446 } else { 2447 int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; 2448 int z = 0; 2449 int easy=0; 2450 skip(s, offset - 14 - hsz); 2451 if (bpp == 24) width = 3 * s.img_x; 2452 else if (bpp == 16) width = 2*s.img_x; 2453 else /* bpp = 32 and pad = 0 */ width=0; 2454 pad = (-width) & 3; 2455 if (bpp == 24) { 2456 easy = 1; 2457 } else if (bpp == 32) { 2458 if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) 2459 easy = 2; 2460 } 2461 if (!easy) { 2462 if (!mr || !mg || !mb) { free(out_); throw new STBImageException("bad masks, corrupt BMP"); } 2463 // right shift amt to put high bit in position #7 2464 rshift = high_bit(mr)-7; rcount = bitcount(mr); 2465 gshift = high_bit(mg)-7; gcount = bitcount(mr); 2466 bshift = high_bit(mb)-7; bcount = bitcount(mr); 2467 ashift = high_bit(ma)-7; acount = bitcount(mr); 2468 } 2469 for (j=0; j < cast(int) s.img_y; ++j) { 2470 if (easy) { 2471 for (i=0; i < cast(int) s.img_x; ++i) { 2472 int a; 2473 out_[z+2] = get8u(s); 2474 out_[z+1] = get8u(s); 2475 out_[z+0] = get8u(s); 2476 z += 3; 2477 a = (easy == 2 ? get8(s) : 255); 2478 if (target == 4) out_[z++] = cast(ubyte) a; 2479 } 2480 } else { 2481 for (i=0; i < cast(int) s.img_x; ++i) { 2482 uint v = (bpp == 16 ? get16le(s) : get32le(s)); 2483 int a; 2484 out_[z++] = cast(ubyte) shiftsigned(v & mr, rshift, rcount); 2485 out_[z++] = cast(ubyte) shiftsigned(v & mg, gshift, gcount); 2486 out_[z++] = cast(ubyte) shiftsigned(v & mb, bshift, bcount); 2487 a = (ma ? shiftsigned(v & ma, ashift, acount) : 255); 2488 if (target == 4) out_[z++] = cast(ubyte) a; 2489 } 2490 } 2491 skip(s, pad); 2492 } 2493 } 2494 if (flip_vertically) { 2495 ubyte t; 2496 for (j=0; j < cast(int) s.img_y>>1; ++j) { 2497 ubyte *p1 = out_ + j *s.img_x*target; 2498 ubyte *p2 = out_ + (s.img_y-1-j)*s.img_x*target; 2499 for (i=0; i < cast(int) s.img_x*target; ++i) { 2500 t = p1[i], p1[i] = p2[i], p2[i] = t; 2501 } 2502 } 2503 } 2504 2505 if (req_comp && req_comp != target) { 2506 out_ = convert_format(out_, target, req_comp, s.img_x, s.img_y); 2507 if (out_ == null) return out_; // convert_format frees input on failure 2508 } 2509 2510 *x = s.img_x; 2511 *y = s.img_y; 2512 if (comp) *comp = s.img_n; 2513 return out_; 2514 } 2515 2516 ubyte *stbi_bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2517 { 2518 return bmp_load(s, x,y,comp,req_comp); 2519 } 2520 2521 // ************************************************************************************************* 2522 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb 2523 struct stbi_gif_lzw 2524 { 2525 short prefix; 2526 ubyte first; 2527 ubyte suffix; 2528 } 2529 2530 struct stbi_gif 2531 { 2532 int w,h; 2533 ubyte *out_; // output buffer (always 4 components) 2534 int flags, bgindex, ratio, transparent, eflags; 2535 ubyte pal[256][4]; 2536 ubyte lpal[256][4]; 2537 stbi_gif_lzw codes[4096]; 2538 ubyte *color_table; 2539 int parse, step; 2540 int lflags; 2541 int start_x, start_y; 2542 int max_x, max_y; 2543 int cur_x, cur_y; 2544 int line_size; 2545 } 2546 2547 void stbi_gif_test(stbi *s) 2548 { 2549 int sz; 2550 if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') 2551 throw new STBImageException("Couldn't decode GIF header"); 2552 sz = get8(s); 2553 if (sz != '9' && sz != '7') 2554 throw new STBImageException("Couldn't decode GIF header"); 2555 if (get8(s) != 'a') 2556 throw new STBImageException("Couldn't decode GIF header"); 2557 } 2558 2559 void stbi_gif_parse_colortable(stbi *s, ubyte pal[256][4], int num_entries, int transp) 2560 { 2561 int i; 2562 for (i=0; i < num_entries; ++i) { 2563 pal[i][2] = get8u(s); 2564 pal[i][1] = get8u(s); 2565 pal[i][0] = get8u(s); 2566 pal[i][3] = transp ? 0 : 255; 2567 } 2568 } 2569 2570 int stbi_gif_header(stbi *s, stbi_gif *g, int *comp, int is_info) 2571 { 2572 ubyte version_; 2573 if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') 2574 throw new STBImageException("not GIF, corrupt GIF"); 2575 2576 version_ = get8u(s); 2577 if (version_ != '7' && version_ != '9') throw new STBImageException("not GIF, corrupt GIF"); 2578 if (get8(s) != 'a') throw new STBImageException("not GIF, corrupt GIF"); 2579 2580 g.w = get16le(s); 2581 g.h = get16le(s); 2582 g.flags = get8(s); 2583 g.bgindex = get8(s); 2584 g.ratio = get8(s); 2585 g.transparent = -1; 2586 2587 if (comp != null) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments 2588 2589 if (is_info) return 1; 2590 2591 if (g.flags & 0x80) 2592 stbi_gif_parse_colortable(s,g.pal, 2 << (g.flags & 7), -1); 2593 2594 return 1; 2595 } 2596 2597 void stbi_out_gif_code(stbi_gif *g, ushort code) 2598 { 2599 ubyte *p; 2600 ubyte *c; 2601 2602 // recurse to decode the prefixes, since the linked-list is backwards, 2603 // and working backwards through an interleaved image would be nasty 2604 if (g.codes[code].prefix >= 0) 2605 stbi_out_gif_code(g, g.codes[code].prefix); 2606 2607 if (g.cur_y >= g.max_y) return; 2608 2609 p = (&g.out_[g.cur_x + g.cur_y]); 2610 c = &g.color_table[g.codes[code].suffix * 4]; 2611 2612 if (c[3] >= 128) { 2613 p[0] = c[2]; 2614 p[1] = c[1]; 2615 p[2] = c[0]; 2616 p[3] = c[3]; 2617 } 2618 g.cur_x += 4; 2619 2620 if (g.cur_x >= g.max_x) { 2621 g.cur_x = g.start_x; 2622 g.cur_y += g.step; 2623 2624 while (g.cur_y >= g.max_y && g.parse > 0) { 2625 g.step = (1 << g.parse) * g.line_size; 2626 g.cur_y = g.start_y + (g.step >> 1); 2627 --g.parse; 2628 } 2629 } 2630 } 2631 2632 ubyte *stbi_process_gif_raster(stbi *s, stbi_gif *g) 2633 { 2634 ubyte lzw_cs; 2635 int len, code; 2636 uint first; 2637 int codesize, codemask, avail, oldcode, bits, valid_bits, clear; 2638 stbi_gif_lzw *p; 2639 2640 lzw_cs = get8u(s); 2641 clear = 1 << lzw_cs; 2642 first = 1; 2643 codesize = lzw_cs + 1; 2644 codemask = (1 << codesize) - 1; 2645 bits = 0; 2646 valid_bits = 0; 2647 for (code = 0; code < clear; code++) { 2648 g.codes[code].prefix = -1; 2649 g.codes[code].first = cast(ubyte) code; 2650 g.codes[code].suffix = cast(ubyte) code; 2651 } 2652 2653 // support no starting clear code 2654 avail = clear+2; 2655 oldcode = -1; 2656 2657 len = 0; 2658 for(;;) { 2659 if (valid_bits < codesize) { 2660 if (len == 0) { 2661 len = get8(s); // start new block 2662 if (len == 0) 2663 return g.out_; 2664 } 2665 --len; 2666 bits |= cast(int) get8(s) << valid_bits; 2667 valid_bits += 8; 2668 } else { 2669 int code_ = bits & codemask; 2670 bits >>= codesize; 2671 valid_bits -= codesize; 2672 // @OPTIMIZE: is there some way we can accelerate the non-clear path? 2673 if (code_ == clear) { // clear code 2674 codesize = lzw_cs + 1; 2675 codemask = (1 << codesize) - 1; 2676 avail = clear + 2; 2677 oldcode = -1; 2678 first = 0; 2679 } else if (code_ == clear + 1) { // end of stream code 2680 skip(s, len); 2681 while ((len = get8(s)) > 0) 2682 skip(s,len); 2683 return g.out_; 2684 } else if (code_ <= avail) { 2685 if (first) throw new STBImageException("no clear code, corrupt GIF"); 2686 2687 if (oldcode >= 0) { 2688 p = &g.codes[avail++]; 2689 if (avail > 4096) throw new STBImageException("too many codes, corrupt GIF"); 2690 p.prefix = cast(short) oldcode; 2691 p.first = g.codes[oldcode].first; 2692 p.suffix = (code_ == avail) ? p.first : g.codes[code_].first; 2693 } else if (code_ == avail) 2694 throw new STBImageException("illegal code in raster, corrupt GIF"); 2695 2696 stbi_out_gif_code(g, cast(ushort) code); 2697 2698 if ((avail & codemask) == 0 && avail <= 0x0FFF) { 2699 codesize++; 2700 codemask = (1 << codesize) - 1; 2701 } 2702 2703 oldcode = code_; 2704 } else { 2705 throw new STBImageException("illegal code in raster, corrupt GIF"); 2706 } 2707 } 2708 } 2709 } 2710 2711 void stbi_fill_gif_background(stbi_gif *g) 2712 { 2713 int i; 2714 ubyte *c = g.pal[g.bgindex].ptr; 2715 // @OPTIMIZE: write a dword at a time 2716 for (i = 0; i < g.w * g.h * 4; i += 4) { 2717 ubyte *p = &g.out_[i]; 2718 p[0] = c[2]; 2719 p[1] = c[1]; 2720 p[2] = c[0]; 2721 p[3] = c[3]; 2722 } 2723 } 2724 2725 // this function is designed to support animated gifs, although stb_image doesn't support it 2726 ubyte *stbi_gif_load_next(stbi *s, stbi_gif *g, int *comp, int req_comp) 2727 { 2728 int i; 2729 ubyte *old_out = null; 2730 2731 if (g.out_ == null) { 2732 if (!stbi_gif_header(s, g, comp,0)) return null; // failure_reason set by stbi_gif_header 2733 g.out_ = cast(ubyte*) malloc(4 * g.w * g.h); 2734 if (g.out_ == null) throw new STBImageException("Out of memory"); 2735 stbi_fill_gif_background(g); 2736 } else { 2737 // animated-gif-only path 2738 if (((g.eflags & 0x1C) >> 2) == 3) { 2739 old_out = g.out_; 2740 g.out_ = cast(ubyte*) malloc(4 * g.w * g.h); 2741 if (g.out_ == null) throw new STBImageException("Out of memory"); 2742 memcpy(g.out_, old_out, g.w*g.h*4); 2743 } 2744 } 2745 2746 for (;;) { 2747 switch (get8(s)) { 2748 case 0x2C: /* Image Descriptor */ 2749 { 2750 int x, y, w, h; 2751 ubyte *o; 2752 2753 x = get16le(s); 2754 y = get16le(s); 2755 w = get16le(s); 2756 h = get16le(s); 2757 if (((x + w) > (g.w)) || ((y + h) > (g.h))) 2758 throw new STBImageException("bad Image Descriptor, corrupt GIF"); 2759 2760 g.line_size = g.w * 4; 2761 g.start_x = x * 4; 2762 g.start_y = y * g.line_size; 2763 g.max_x = g.start_x + w * 4; 2764 g.max_y = g.start_y + h * g.line_size; 2765 g.cur_x = g.start_x; 2766 g.cur_y = g.start_y; 2767 2768 g.lflags = get8(s); 2769 2770 if (g.lflags & 0x40) { 2771 g.step = 8 * g.line_size; // first interlaced spacing 2772 g.parse = 3; 2773 } else { 2774 g.step = g.line_size; 2775 g.parse = 0; 2776 } 2777 2778 if (g.lflags & 0x80) { 2779 stbi_gif_parse_colortable(s,g.lpal, 2 << (g.lflags & 7), g.eflags & 0x01 ? g.transparent : -1); 2780 g.color_table = &g.lpal[0][0]; 2781 } else if (g.flags & 0x80) { 2782 for (i=0; i < 256; ++i) // @OPTIMIZE: reset only the previous transparent 2783 g.pal[i][3] = 255; 2784 if (g.transparent >= 0 && (g.eflags & 0x01)) 2785 g.pal[g.transparent][3] = 0; 2786 g.color_table = &g.pal[0][0]; 2787 } else 2788 throw new STBImageException("missing color table, corrupt GIF"); 2789 2790 o = stbi_process_gif_raster(s, g); 2791 if (o == null) return null; 2792 2793 if (req_comp && req_comp != 4) 2794 o = convert_format(o, 4, req_comp, g.w, g.h); 2795 return o; 2796 } 2797 2798 case 0x21: // Comment Extension. 2799 { 2800 int len; 2801 if (get8(s) == 0xF9) { // Graphic Control Extension. 2802 len = get8(s); 2803 if (len == 4) { 2804 g.eflags = get8(s); 2805 get16le(s); // delay 2806 g.transparent = get8(s); 2807 } else { 2808 skip(s, len); 2809 break; 2810 } 2811 } 2812 while ((len = get8(s)) != 0) 2813 skip(s, len); 2814 break; 2815 } 2816 2817 case 0x3B: // gif stream termination code 2818 return cast(ubyte*) 1; 2819 2820 default: 2821 throw new STBImageException("unknown code, corrupt GIF"); 2822 } 2823 } 2824 } 2825 2826 ubyte *stbi_gif_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2827 { 2828 ubyte *u = null; 2829 stbi_gif g={0}; 2830 2831 u = stbi_gif_load_next(s, &g, comp, req_comp); 2832 if (u == cast(void *) 1) u = null; // end of animated gif marker 2833 if (u) { 2834 *x = g.w; 2835 *y = g.h; 2836 } 2837 2838 return u; 2839 } 2840 2841