1 /// D translation of stb_image-1.33 (http://nothings.org/stb_image.c) 2 /// 3 /// This port only supports: 4 /// $(UL 5 /// $(LI PNG 8-bit-per-channel only.) 6 /// $(LI JPEG baseline (no JPEG progressive).) 7 /// $(LI GIF.) 8 /// $(LI BMP non-1bpp, non-RLE.) 9 /// ) 10 /// 11 /// TODO: 12 /// $(UL 13 /// $(LI Support a range as input.) 14 /// ) 15 16 //============================ Contributors ========================= 17 // 18 // Image formats Optimizations & bugfixes 19 // Sean Barrett (jpeg, png, bmp) Fabian "ryg" Giesen 20 // Nicolas Schulz (hdr, psd) 21 // Jonathan Dummer (tga) Bug fixes & warning fixes 22 // Jean-Marc Lienher (gif) Marc LeBlanc 23 // Tom Seddon (pic) Christpher Lloyd 24 // Thatcher Ulrich (psd) Dave Moore 25 // Won Chun 26 // the Horde3D community 27 // Extensions, features Janez Zemva 28 // Jetro Lauha (stbi_info) Jonathan Blow 29 // James "moose2000" Brown (iPhone PNG) Laurent Gomila 30 // Ben "Disch" Wenger (io callbacks) Aruelien Pocheville 31 // Martin "SpartanJ" Golini Ryamond Barbiero 32 // David Woo 33 34 module gfm.image.stb_image; 35 36 import core.stdc.stdlib; 37 import core.stdc..string; 38 39 import ae.utils.graphics.image; 40 import ae.utils.graphics.color; 41 42 43 44 /// The one function you probably want to use. 45 /// Loads an image from a static array. 46 /// Because probing has been removed from stb_image, parsing is optimistic 47 /// and might throw internally before finding the right image format. 48 /// Throws: $(D STBImageException) on error. 49 Image!RGBA loadImage(const(ubyte[]) imageData) 50 { 51 import gfm.image.stb_image; 52 import core.stdc..string; 53 54 void[] data = cast(void[])imageData; 55 int width, height, components; 56 ubyte* decoded = stbi_load_from_memory(data, width, height, components, 4); 57 scope(exit) stbi_image_free(decoded); 58 59 // stb_image guarantees that ouput will always have 4 components when asked 60 // Fortunately they are already RGBA 61 62 // allocates result 63 Image!RGBA loaded; 64 loaded.size(width, height); 65 66 // copy pixels (here they are contiguous in each case) 67 memcpy(loaded.pixels.ptr, decoded, width * height * 4); 68 return loaded; // this uses the GC to give up ownership 69 } 70 71 enum STBI_VERSION = 1; 72 73 /// The exception type thrown when loading an image failed. 74 class STBImageException : Exception 75 { 76 public 77 { 78 @safe pure nothrow this(string message, string file =__FILE__, size_t line = __LINE__, Throwable next = null) 79 { 80 super(message, file, line, next); 81 } 82 } 83 } 84 85 enum : int 86 { 87 STBI_default = 0, // only used for req_comp 88 STBI_grey = 1, 89 STBI_grey_alpha = 2, 90 STBI_rgb = 3, 91 STBI_rgb_alpha = 4 92 }; 93 94 // define faster low-level operations (typically SIMD support) 95 96 97 uint stbi_lrot(uint x, uint y) 98 { 99 return (x << y) | (x >> (32 - y)); 100 } 101 102 // stbi structure is our basic context used by all images, so it 103 // contains all the IO context, plus some basic image information 104 struct stbi 105 { 106 uint img_x, img_y; 107 int img_n, img_out_n; 108 109 int buflen; 110 ubyte[128] buffer_start; 111 112 const(ubyte) *img_buffer; 113 const(ubyte) *img_buffer_end; 114 const(ubyte) *img_buffer_original; 115 } 116 117 118 // initialize a memory-decode context 119 void start_mem(stbi *s, const(ubyte)*buffer, int len) 120 { 121 s.img_buffer = buffer; 122 s.img_buffer_original = buffer; 123 s.img_buffer_end = buffer+len; 124 } 125 126 void stbi_rewind(stbi *s) 127 { 128 // conceptually rewind SHOULD rewind to the beginning of the stream, 129 // but we just rewind to the beginning of the initial buffer, because 130 // we only use it after doing 'test', which only ever looks at at most 92 bytes 131 s.img_buffer = s.img_buffer_original; 132 } 133 134 135 ubyte *stbi_load_main(stbi *s, int *x, int *y, int *comp, int req_comp) 136 { 137 try 138 { 139 stbi_jpeg_test(s); 140 stbi_rewind(s); 141 return stbi_jpeg_load(s,x,y,comp,req_comp); 142 } 143 catch(STBImageException e) 144 { 145 stbi_rewind(s); 146 } 147 148 try 149 { 150 stbi_png_test(s); 151 stbi_rewind(s); 152 return stbi_png_load(s,x,y,comp,req_comp); 153 } 154 catch(STBImageException e) 155 { 156 stbi_rewind(s); 157 } 158 159 try 160 { 161 stbi_bmp_test(s); 162 stbi_rewind(s); 163 return stbi_bmp_load(s,x,y,comp,req_comp); 164 } 165 catch(STBImageException e) 166 { 167 stbi_rewind(s); 168 } 169 170 try 171 { 172 stbi_gif_test(s); 173 stbi_rewind(s); 174 return stbi_gif_load(s,x,y,comp,req_comp); 175 } 176 catch(STBImageException e) 177 { 178 stbi_rewind(s); 179 } 180 181 throw new STBImageException("Image not of any known type, or corrupt"); 182 } 183 184 /// Loads an image from memory. 185 /// Throws: STBImageException on error. 186 ubyte* stbi_load_from_memory(void[] buffer, out int width, out int height, out int components, int requestedComponents) 187 { 188 stbi s; 189 start_mem(&s, cast(ubyte*)buffer.ptr, cast(int)(buffer.length)); 190 return stbi_load_main(&s, &width, &height, &components, requestedComponents); 191 } 192 193 /// Frees an image loaded by stb_image. 194 void stbi_image_free(void *retval_from_stbi_load) 195 { 196 free(retval_from_stbi_load); 197 } 198 199 /// Load an image from memory and puts it in a ae.utils.graphics.image.Image. 200 /// Throws: STBImageException on error. 201 Image!RGBA stbiLoadImageAE(void[] buffer) 202 { 203 int width, height, components; 204 ubyte* data = stbi_load_from_memory(buffer, width, height, components, 4); 205 ubyte[] range = data[0..width*height*4]; 206 scope(exit) stbi_image_free(data); 207 208 auto result = Image!RGBA(width, height); 209 size_t length = width * height * RGBA.sizeof; 210 result.pixels[] = cast(RGBA[])(data[0..length]); 211 return result; 212 } 213 214 // 215 // Common code used by all image loaders 216 // 217 218 enum : int 219 { 220 SCAN_load=0, 221 SCAN_type, 222 SCAN_header 223 }; 224 225 226 int get8(stbi *s) 227 { 228 if (s.img_buffer < s.img_buffer_end) 229 return *s.img_buffer++; 230 231 return 0; 232 } 233 234 int at_eof(stbi *s) 235 { 236 return s.img_buffer >= s.img_buffer_end; 237 } 238 239 ubyte get8u(stbi *s) 240 { 241 return cast(ubyte) get8(s); 242 } 243 244 void skip(stbi *s, int n) 245 { 246 s.img_buffer += n; 247 } 248 249 int getn(stbi *s, ubyte *buffer, int n) 250 { 251 if (s.img_buffer+n <= s.img_buffer_end) { 252 memcpy(buffer, s.img_buffer, n); 253 s.img_buffer += n; 254 return 1; 255 } else 256 return 0; 257 } 258 259 int get16(stbi *s) 260 { 261 int z = get8(s); 262 return (z << 8) + get8(s); 263 } 264 265 uint get32(stbi *s) 266 { 267 uint z = get16(s); 268 return (z << 16) + get16(s); 269 } 270 271 int get16le(stbi *s) 272 { 273 int z = get8(s); 274 return z + (get8(s) << 8); 275 } 276 277 uint get32le(stbi *s) 278 { 279 uint z = get16le(s); 280 return z + (get16le(s) << 16); 281 } 282 283 // 284 // generic converter from built-in img_n to req_comp 285 // individual types do this automatically as much as possible (e.g. jpeg 286 // does all cases internally since it needs to colorspace convert anyway, 287 // and it never has alpha, so very few cases ). png can automatically 288 // interleave an alpha=255 channel, but falls back to this for other cases 289 // 290 // assume data buffer is malloced, so malloc a new one and free that one 291 // only failure mode is malloc failing 292 293 ubyte compute_y(int r, int g, int b) 294 { 295 return cast(ubyte) (((r*77) + (g*150) + (29*b)) >> 8); 296 } 297 298 ubyte *convert_format(ubyte *data, int img_n, int req_comp, uint x, uint y) 299 { 300 int i,j; 301 ubyte *good; 302 303 if (req_comp == img_n) return data; 304 assert(req_comp >= 1 && req_comp <= 4); 305 306 good = cast(ubyte*) malloc(req_comp * x * y); 307 if (good == null) { 308 free(data); 309 throw new STBImageException("Out of memory"); 310 } 311 312 for (j=0; j < cast(int) y; ++j) { 313 ubyte *src = data + j * x * img_n ; 314 ubyte *dest = good + j * x * req_comp; 315 316 // convert source image with img_n components to one with req_comp components; 317 // avoid switch per pixel, so use switch per scanline and massive macros 318 switch (img_n * 8 + req_comp) 319 { 320 case 1 * 8 + 2: 321 for(i=x-1; i >= 0; --i, src += 1, dest += 2) 322 dest[0] = src[0], dest[1] = 255; 323 break; 324 case 1 * 8 + 3: 325 for(i=x-1; i >= 0; --i, src += 1, dest += 3) 326 dest[0]=dest[1]=dest[2]=src[0]; 327 break; 328 case 1 * 8 + 4: 329 for(i=x-1; i >= 0; --i, src += 1, dest += 4) 330 dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; 331 break; 332 case 2 * 8 + 1: 333 for(i=x-1; i >= 0; --i, src += 2, dest += 1) 334 dest[0]=src[0]; 335 break; 336 case 2 * 8 + 3: 337 for(i=x-1; i >= 0; --i, src += 2, dest += 3) 338 dest[0]=dest[1]=dest[2]=src[0]; 339 break; 340 case 2 * 8 + 4: 341 for(i=x-1; i >= 0; --i, src += 2, dest += 4) 342 dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; 343 break; 344 case 3 * 8 + 4: 345 for(i=x-1; i >= 0; --i, src += 3, dest += 4) 346 dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; 347 break; 348 case 3 * 8 + 1: 349 for(i=x-1; i >= 0; --i, src += 3, dest += 1) 350 dest[0]=compute_y(src[0],src[1],src[2]); 351 break; 352 case 3 * 8 + 2: 353 for(i=x-1; i >= 0; --i, src += 3, dest += 2) 354 dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; 355 break; 356 case 4 * 8 + 1: 357 for(i=x-1; i >= 0; --i, src += 4, dest += 1) 358 dest[0]=compute_y(src[0],src[1],src[2]); 359 break; 360 case 4 * 8 + 2: 361 for(i=x-1; i >= 0; --i, src += 4, dest += 2) 362 dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; 363 break; 364 case 4 * 8 + 3: 365 for(i=x-1; i >= 0; --i, src += 4, dest += 3) 366 dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; 367 break; 368 default: assert(0); 369 } 370 } 371 372 free(data); 373 return good; 374 } 375 376 // 377 // "baseline" JPEG/JFIF decoder (not actually fully baseline implementation) 378 // 379 // simple implementation 380 // - channel subsampling of at most 2 in each dimension 381 // - doesn't support delayed output of y-dimension 382 // - simple interface (only one output format: 8-bit interleaved RGB) 383 // - doesn't try to recover corrupt jpegs 384 // - doesn't allow partial loading, loading multiple at once 385 // - still fast on x86 (copying globals into locals doesn't help x86) 386 // - allocates lots of intermediate memory (full size of all components) 387 // - non-interleaved case requires this anyway 388 // - allows good upsampling (see next) 389 // high-quality 390 // - upsampled channels are bilinearly interpolated, even across blocks 391 // - quality integer IDCT derived from IJG's 'slow' 392 // performance 393 // - fast huffman; reasonable integer IDCT 394 // - uses a lot of intermediate memory, could cache poorly 395 // - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4 396 // stb_jpeg: 1.34 seconds (MSVC6, default release build) 397 // stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro) 398 // IJL11.dll: 1.08 seconds (compiled by intel) 399 // IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG) 400 // IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro) 401 402 // huffman decoding acceleration 403 enum FAST_BITS = 9; // larger handles more cases; smaller stomps less cache 404 405 struct huffman 406 { 407 ubyte[1 << FAST_BITS] fast; 408 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win 409 ushort[256] code; 410 ubyte[256] values; 411 ubyte[257] size; 412 uint[18] maxcode; 413 int[17] delta; // old 'firstsymbol' - old 'firstcode' 414 } 415 416 struct jpeg 417 { 418 stbi *s; 419 huffman[4] huff_dc; 420 huffman[4] huff_ac; 421 ubyte[64][4] dequant; 422 423 // sizes for components, interleaved MCUs 424 int img_h_max, img_v_max; 425 int img_mcu_x, img_mcu_y; 426 int img_mcu_w, img_mcu_h; 427 428 // definition of jpeg image component 429 struct img_comp_ 430 { 431 int id; 432 int h,v; 433 int tq; 434 int hd,ha; 435 int dc_pred; 436 437 int x,y,w2,h2; 438 ubyte *data; 439 void *raw_data; 440 ubyte *linebuf; 441 } 442 443 img_comp_[4] img_comp; 444 445 uint code_buffer; // jpeg entropy-coded buffer 446 int code_bits; // number of valid bits 447 ubyte marker; // marker seen while filling entropy buffer 448 int nomore; // flag if we saw a marker so must stop 449 450 int scan_n; 451 int[4] order; 452 int restart_interval, todo; 453 } 454 455 456 int build_huffman(huffman *h, int *count) 457 { 458 int i,j,k=0,code; 459 // build size list for each symbol (from JPEG spec) 460 for (i=0; i < 16; ++i) 461 for (j=0; j < count[i]; ++j) 462 h.size[k++] = cast(ubyte) (i+1); 463 h.size[k] = 0; 464 465 // compute actual symbols (from jpeg spec) 466 code = 0; 467 k = 0; 468 for(j=1; j <= 16; ++j) { 469 // compute delta to add to code to compute symbol id 470 h.delta[j] = k - code; 471 if (h.size[k] == j) { 472 while (h.size[k] == j) 473 h.code[k++] = cast(ushort) (code++); 474 if (code-1 >= (1 << j)) 475 throw new STBImageException("Bad code lengths, corrupt JPEG"); 476 } 477 // compute largest code + 1 for this size, preshifted as needed later 478 h.maxcode[j] = code << (16-j); 479 code <<= 1; 480 } 481 h.maxcode[j] = 0xffffffff; 482 483 // build non-spec acceleration table; 255 is flag for not-accelerated 484 memset(h.fast.ptr, 255, 1 << FAST_BITS); 485 for (i=0; i < k; ++i) { 486 int s = h.size[i]; 487 if (s <= FAST_BITS) { 488 int c = h.code[i] << (FAST_BITS-s); 489 int m = 1 << (FAST_BITS-s); 490 for (j=0; j < m; ++j) { 491 h.fast[c+j] = cast(ubyte) i; 492 } 493 } 494 } 495 return 1; 496 } 497 498 void grow_buffer_unsafe(jpeg *j) 499 { 500 do { 501 int b = j.nomore ? 0 : get8(j.s); 502 if (b == 0xff) { 503 int c = get8(j.s); 504 if (c != 0) { 505 j.marker = cast(ubyte) c; 506 j.nomore = 1; 507 return; 508 } 509 } 510 j.code_buffer |= b << (24 - j.code_bits); 511 j.code_bits += 8; 512 } while (j.code_bits <= 24); 513 } 514 515 // (1 << n) - 1 516 static immutable uint[17] bmask=[0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535]; 517 518 // decode a jpeg huffman value from the bitstream 519 int decode(jpeg *j, huffman *h) 520 { 521 uint temp; 522 int c,k; 523 524 if (j.code_bits < 16) grow_buffer_unsafe(j); 525 526 // look at the top FAST_BITS and determine what symbol ID it is, 527 // if the code is <= FAST_BITS 528 c = (j.code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); 529 k = h.fast[c]; 530 if (k < 255) { 531 int s = h.size[k]; 532 if (s > j.code_bits) 533 return -1; 534 j.code_buffer <<= s; 535 j.code_bits -= s; 536 return h.values[k]; 537 } 538 539 // naive test is to shift the code_buffer down so k bits are 540 // valid, then test against maxcode. To speed this up, we've 541 // preshifted maxcode left so that it has (16-k) 0s at the 542 // end; in other words, regardless of the number of bits, it 543 // wants to be compared against something shifted to have 16; 544 // that way we don't need to shift inside the loop. 545 temp = j.code_buffer >> 16; 546 for (k=FAST_BITS+1 ; ; ++k) 547 if (temp < h.maxcode[k]) 548 break; 549 if (k == 17) { 550 // error! code not found 551 j.code_bits -= 16; 552 return -1; 553 } 554 555 if (k > j.code_bits) 556 return -1; 557 558 // convert the huffman code to the symbol id 559 c = ((j.code_buffer >> (32 - k)) & bmask[k]) + h.delta[k]; 560 assert((((j.code_buffer) >> (32 - h.size[c])) & bmask[h.size[c]]) == h.code[c]); 561 562 // convert the id to a symbol 563 j.code_bits -= k; 564 j.code_buffer <<= k; 565 return h.values[c]; 566 } 567 568 // combined JPEG 'receive' and JPEG 'extend', since baseline 569 // always extends everything it receives. 570 int extend_receive(jpeg *j, int n) 571 { 572 uint m = 1 << (n-1); 573 uint k; 574 if (j.code_bits < n) grow_buffer_unsafe(j); 575 576 k = stbi_lrot(j.code_buffer, n); 577 j.code_buffer = k & ~bmask[n]; 578 k &= bmask[n]; 579 j.code_bits -= n; 580 581 // the following test is probably a random branch that won't 582 // predict well. I tried to table accelerate it but failed. 583 // maybe it's compiling as a conditional move? 584 if (k < m) 585 return (-1 << n) + k + 1; 586 else 587 return k; 588 } 589 590 // given a value that's at position X in the zigzag stream, 591 // where does it appear in the 8x8 matrix coded as row-major? 592 static immutable ubyte[64+15] dezigzag = 593 [ 594 0, 1, 8, 16, 9, 2, 3, 10, 595 17, 24, 32, 25, 18, 11, 4, 5, 596 12, 19, 26, 33, 40, 48, 41, 34, 597 27, 20, 13, 6, 7, 14, 21, 28, 598 35, 42, 49, 56, 57, 50, 43, 36, 599 29, 22, 15, 23, 30, 37, 44, 51, 600 58, 59, 52, 45, 38, 31, 39, 46, 601 53, 60, 61, 54, 47, 55, 62, 63, 602 // let corrupt input sample past end 603 63, 63, 63, 63, 63, 63, 63, 63, 604 63, 63, 63, 63, 63, 63, 63 605 ]; 606 607 // decode one 64-entry block-- 608 int decode_block(jpeg *j, short[64] data, huffman *hdc, huffman *hac, int b) 609 { 610 int diff,dc,k; 611 int t = decode(j, hdc); 612 if (t < 0) 613 throw new STBImageException("Bad huffman code, corrupt JPEG"); 614 615 // 0 all the ac values now so we can do it 32-bits at a time 616 memset(data.ptr,0,64*(data[0]).sizeof); 617 618 diff = t ? extend_receive(j, t) : 0; 619 dc = j.img_comp[b].dc_pred + diff; 620 j.img_comp[b].dc_pred = dc; 621 data[0] = cast(short) dc; 622 623 // decode AC components, see JPEG spec 624 k = 1; 625 do { 626 int r,s; 627 int rs = decode(j, hac); 628 if (rs < 0) 629 throw new STBImageException("Bad huffman code, corrupt JPEG"); 630 s = rs & 15; 631 r = rs >> 4; 632 if (s == 0) { 633 if (rs != 0xf0) break; // end block 634 k += 16; 635 } else { 636 k += r; 637 // decode into unzigzag'd location 638 data[dezigzag[k++]] = cast(short) extend_receive(j,s); 639 } 640 } while (k < 64); 641 return 1; 642 } 643 644 // take a -128..127 value and clamp it and convert to 0..255 645 ubyte clamp(int x) 646 { 647 // trick to use a single test to catch both cases 648 if (cast(uint) x > 255) { 649 if (x < 0) return 0; 650 if (x > 255) return 255; 651 } 652 return cast(ubyte) x; 653 } 654 655 int f2f(double x) 656 { 657 return cast(int)(x * 4096 + 0.5); 658 } 659 660 int fsh(int x) 661 { 662 return x << 12; 663 } 664 665 // derived from jidctint -- DCT_ISLOW 666 void IDCT_1D(int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7, 667 out int t0, out int t1, out int t2, out int t3, 668 out int x0, out int x1, out int x2, out int x3) 669 { 670 int p1,p2,p3,p4,p5; 671 p2 = s2; 672 p3 = s6; 673 p1 = (p2+p3) * f2f(0.5411961f); 674 t2 = p1 + p3*f2f(-1.847759065f); 675 t3 = p1 + p2*f2f( 0.765366865f); 676 p2 = s0; 677 p3 = s4; 678 t0 = fsh(p2+p3); 679 t1 = fsh(p2-p3); 680 x0 = t0+t3; 681 x3 = t0-t3; 682 x1 = t1+t2; 683 x2 = t1-t2; 684 t0 = s7; 685 t1 = s5; 686 t2 = s3; 687 t3 = s1; 688 p3 = t0+t2; 689 p4 = t1+t3; 690 p1 = t0+t3; 691 p2 = t1+t2; 692 p5 = (p3+p4)*f2f( 1.175875602f); 693 t0 = t0*f2f( 0.298631336f); 694 t1 = t1*f2f( 2.053119869f); 695 t2 = t2*f2f( 3.072711026f); 696 t3 = t3*f2f( 1.501321110f); 697 p1 = p5 + p1*f2f(-0.899976223f); 698 p2 = p5 + p2*f2f(-2.562915447f); 699 p3 = p3*f2f(-1.961570560f); 700 p4 = p4*f2f(-0.390180644f); 701 t3 += p1+p4; 702 t2 += p2+p3; 703 t1 += p2+p4; 704 t0 += p1+p3; 705 } 706 707 alias stbi_dequantize_t = ubyte; 708 709 // .344 seconds on 3*anemones.jpg 710 void idct_block(ubyte *out_, int out_stride, short[64] data, stbi_dequantize_t *dequantize) 711 { 712 int i; 713 int[64] val; 714 int*v = val.ptr; 715 stbi_dequantize_t *dq = dequantize; 716 ubyte *o; 717 short *d = data.ptr; 718 719 // columns 720 for (i=0; i < 8; ++i,++d,++dq, ++v) { 721 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing 722 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 723 && d[40]==0 && d[48]==0 && d[56]==0) { 724 // no shortcut 0 seconds 725 // (1|2|3|4|5|6|7)==0 0 seconds 726 // all separate -0.047 seconds 727 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds 728 int dcterm = d[0] * dq[0] << 2; 729 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; 730 } else { 731 int t0, t1, t2, t3, x0, x1, x2, x3; 732 IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], 733 d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56], 734 t0, t1, t2, t3, x0, x1, x2, x3); 735 // constants scaled things up by 1<<12; let's bring them back 736 // down, but keep 2 extra bits of precision 737 x0 += 512; x1 += 512; x2 += 512; x3 += 512; 738 v[ 0] = (x0+t3) >> 10; 739 v[56] = (x0-t3) >> 10; 740 v[ 8] = (x1+t2) >> 10; 741 v[48] = (x1-t2) >> 10; 742 v[16] = (x2+t1) >> 10; 743 v[40] = (x2-t1) >> 10; 744 v[24] = (x3+t0) >> 10; 745 v[32] = (x3-t0) >> 10; 746 } 747 } 748 749 for (i=0, v=val.ptr, o=out_; i < 8; ++i,v+=8,o+=out_stride) { 750 751 // no fast case since the first 1D IDCT spread components out 752 int t0, t1, t2, t3, x0, x1, x2, x3; 753 IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7], t0, t1, t2, t3, x0, x1, x2, x3); 754 // constants scaled things up by 1<<12, plus we had 1<<2 from first 755 // loop, plus horizontal and vertical each scale by sqrt(8) so together 756 // we've got an extra 1<<3, so 1<<17 total we need to remove. 757 // so we want to round that, which means adding 0.5 * 1<<17, 758 // aka 65536. Also, we'll end up with -128 to 127 that we want 759 // to encode as 0..255 by adding 128, so we'll add that before the shift 760 x0 += 65536 + (128<<17); 761 x1 += 65536 + (128<<17); 762 x2 += 65536 + (128<<17); 763 x3 += 65536 + (128<<17); 764 // tried computing the shifts into temps, or'ing the temps to see 765 // if any were out of range, but that was slower 766 o[0] = clamp((x0+t3) >> 17); 767 o[7] = clamp((x0-t3) >> 17); 768 o[1] = clamp((x1+t2) >> 17); 769 o[6] = clamp((x1-t2) >> 17); 770 o[2] = clamp((x2+t1) >> 17); 771 o[5] = clamp((x2-t1) >> 17); 772 o[3] = clamp((x3+t0) >> 17); 773 o[4] = clamp((x3-t0) >> 17); 774 } 775 } 776 777 778 enum MARKER_none = 0xff; 779 780 // if there's a pending marker from the entropy stream, return that 781 // otherwise, fetch from the stream and get a marker. if there's no 782 // marker, return 0xff, which is never a valid marker value 783 ubyte get_marker(jpeg *j) 784 { 785 ubyte x; 786 if (j.marker != MARKER_none) { x = j.marker; j.marker = MARKER_none; return x; } 787 x = get8u(j.s); 788 if (x != 0xff) return MARKER_none; 789 while (x == 0xff) 790 x = get8u(j.s); 791 return x; 792 } 793 794 // in each scan, we'll have scan_n components, and the order 795 // of the components is specified by order[] 796 bool RESTART(int x) 797 { 798 return (x >= 0xd0) && (x <= 0xd7); 799 } 800 801 // after a restart interval, reset the entropy decoder and 802 // the dc prediction 803 void reset(jpeg *j) 804 { 805 j.code_bits = 0; 806 j.code_buffer = 0; 807 j.nomore = 0; 808 j.img_comp[0].dc_pred = j.img_comp[1].dc_pred = j.img_comp[2].dc_pred = 0; 809 j.marker = MARKER_none; 810 j.todo = j.restart_interval ? j.restart_interval : 0x7fffffff; 811 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, 812 // since we don't even allow 1<<30 pixels 813 } 814 815 int parse_entropy_coded_data(jpeg *z) 816 { 817 reset(z); 818 if (z.scan_n == 1) { 819 int i,j; 820 short[64] data; 821 int n = z.order[0]; 822 // non-interleaved data, we just need to process one block at a time, 823 // in trivial scanline order 824 // number of blocks to do just depends on how many actual "pixels" this 825 // component has, independent of interleaved MCU blocking and such 826 int w = (z.img_comp[n].x+7) >> 3; 827 int h = (z.img_comp[n].y+7) >> 3; 828 for (j=0; j < h; ++j) { 829 for (i=0; i < w; ++i) { 830 if (!decode_block(z, data, z.huff_dc.ptr+z.img_comp[n].hd, z.huff_ac.ptr+z.img_comp[n].ha, n)) return 0; 831 idct_block(z.img_comp[n].data+z.img_comp[n].w2*j*8+i*8, z.img_comp[n].w2, data, z.dequant[z.img_comp[n].tq].ptr); 832 // every data block is an MCU, so countdown the restart interval 833 if (--z.todo <= 0) { 834 if (z.code_bits < 24) grow_buffer_unsafe(z); 835 // if it's NOT a restart, then just bail, so we get corrupt data 836 // rather than no data 837 if (!RESTART(z.marker)) return 1; 838 reset(z); 839 } 840 } 841 } 842 } else { // interleaved! 843 int i,j,k,x,y; 844 short[64] data; 845 for (j=0; j < z.img_mcu_y; ++j) { 846 for (i=0; i < z.img_mcu_x; ++i) { 847 // scan an interleaved mcu... process scan_n components in order 848 for (k=0; k < z.scan_n; ++k) { 849 int n = z.order[k]; 850 // scan out an mcu's worth of this component; that's just determined 851 // by the basic H and V specified for the component 852 for (y=0; y < z.img_comp[n].v; ++y) { 853 for (x=0; x < z.img_comp[n].h; ++x) { 854 int x2 = (i*z.img_comp[n].h + x)*8; 855 int y2 = (j*z.img_comp[n].v + y)*8; 856 if (!decode_block(z, data, z.huff_dc.ptr+z.img_comp[n].hd, z.huff_ac.ptr+z.img_comp[n].ha, n)) return 0; 857 idct_block(z.img_comp[n].data+z.img_comp[n].w2*y2+x2, z.img_comp[n].w2, data, z.dequant[z.img_comp[n].tq].ptr); 858 } 859 } 860 } 861 // after all interleaved components, that's an interleaved MCU, 862 // so now count down the restart interval 863 if (--z.todo <= 0) { 864 if (z.code_bits < 24) grow_buffer_unsafe(z); 865 // if it's NOT a restart, then just bail, so we get corrupt data 866 // rather than no data 867 if (!RESTART(z.marker)) return 1; 868 reset(z); 869 } 870 } 871 } 872 } 873 return 1; 874 } 875 876 int process_marker(jpeg *z, int m) 877 { 878 int L; 879 switch (m) { 880 881 case MARKER_none: // no marker found 882 throw new STBImageException("Expected marker, corrupt JPEG"); 883 884 case 0xC2: // SOF - progressive 885 throw new STBImageException("JPEG format not supported (progressive)"); 886 887 case 0xDD: // DRI - specify restart interval 888 if (get16(z.s) != 4) 889 throw new STBImageException("Bad DRI len, corrupt JPEG"); 890 z.restart_interval = get16(z.s); 891 return 1; 892 893 case 0xDB: // DQT - define quantization table 894 L = get16(z.s)-2; 895 while (L > 0) { 896 int q = get8(z.s); 897 int p = q >> 4; 898 int t = q & 15,i; 899 if (p != 0) 900 throw new STBImageException("Bad DQT type, corrupt JPEG"); 901 if (t > 3) 902 throw new STBImageException("Bad DQT table, corrupt JPEG"); 903 for (i=0; i < 64; ++i) 904 z.dequant[t][dezigzag[i]] = get8u(z.s); 905 L -= 65; 906 } 907 return L==0; 908 909 case 0xC4: // DHT - define huffman table 910 L = get16(z.s)-2; 911 while (L > 0) { 912 ubyte *v; 913 int[16] sizes; 914 int i; 915 int m_ = 0; 916 int q = get8(z.s); 917 int tc = q >> 4; 918 int th = q & 15; 919 if (tc > 1 || th > 3) 920 throw new STBImageException("Bad DHT header, corrupt JPEG"); 921 for (i=0; i < 16; ++i) { 922 sizes[i] = get8(z.s); 923 m_ += sizes[i]; 924 } 925 L -= 17; 926 if (tc == 0) { 927 if (!build_huffman(z.huff_dc.ptr+th, sizes.ptr)) return 0; 928 v = z.huff_dc[th].values.ptr; 929 } else { 930 if (!build_huffman(z.huff_ac.ptr+th, sizes.ptr)) return 0; 931 v = z.huff_ac[th].values.ptr; 932 } 933 for (i=0; i < m_; ++i) 934 v[i] = get8u(z.s); 935 L -= m_; 936 } 937 return L==0; 938 939 default: 940 break; 941 } 942 // check for comment block or APP blocks 943 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { 944 skip(z.s, get16(z.s)-2); 945 return 1; 946 } 947 return 0; 948 } 949 950 // after we see SOS 951 int process_scan_header(jpeg *z) 952 { 953 int i; 954 int Ls = get16(z.s); 955 z.scan_n = get8(z.s); 956 if (z.scan_n < 1 || z.scan_n > 4 || z.scan_n > cast(int) z.s.img_n) 957 throw new STBImageException("Bad SOS component count, Corrupt JPEG"); 958 959 if (Ls != 6+2*z.scan_n) 960 throw new STBImageException("Bad SOS length, Corrupt JPEG"); 961 962 for (i=0; i < z.scan_n; ++i) { 963 int id = get8(z.s), which; 964 int q = get8(z.s); 965 for (which = 0; which < z.s.img_n; ++which) 966 if (z.img_comp[which].id == id) 967 break; 968 if (which == z.s.img_n) return 0; 969 z.img_comp[which].hd = q >> 4; 970 if (z.img_comp[which].hd > 3) 971 throw new STBImageException("Bad DC huff, Corrupt JPEG"); 972 z.img_comp[which].ha = q & 15; 973 if (z.img_comp[which].ha > 3) 974 throw new STBImageException("Bad AC huff, Corrupt JPEG"); 975 z.order[i] = which; 976 } 977 if (get8(z.s) != 0) 978 throw new STBImageException("Bad SOS, Corrupt JPEG"); 979 get8(z.s); // should be 63, but might be 0 980 if (get8(z.s) != 0) 981 throw new STBImageException("Bad SOS, Corrupt JPEG"); 982 983 return 1; 984 } 985 986 int process_frame_header(jpeg *z, int scan) 987 { 988 stbi *s = z.s; 989 int Lf,p,i,q, h_max=1,v_max=1,c; 990 Lf = get16(s); if (Lf < 11) throw new STBImageException("Bad SOF len, Corrupt JPEG"); 991 p = get8(s); if (p != 8) throw new STBImageException("JPEG format not supported: 8-bit only"); // JPEG baseline 992 s.img_y = get16(s); if (s.img_y == 0) throw new STBImageException("No header height, JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG 993 s.img_x = get16(s); if (s.img_x == 0) throw new STBImageException("0 width, corrupt JPEG"); // JPEG requires 994 c = get8(s); 995 if (c != 3 && c != 1) throw new STBImageException("Bad component count, corrupt JPEG"); // JFIF requires 996 s.img_n = c; 997 for (i=0; i < c; ++i) { 998 z.img_comp[i].data = null; 999 z.img_comp[i].linebuf = null; 1000 } 1001 1002 if (Lf != 8+3*s.img_n) throw new STBImageException("Bad SOF len, corrupt JPEG"); 1003 1004 for (i=0; i < s.img_n; ++i) { 1005 z.img_comp[i].id = get8(s); 1006 if (z.img_comp[i].id != i+1) // JFIF requires 1007 if (z.img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files! 1008 throw new STBImageException("Bad component ID, corrupt JPEG"); 1009 q = get8(s); 1010 z.img_comp[i].h = (q >> 4); if (!z.img_comp[i].h || z.img_comp[i].h > 4) throw new STBImageException("Bad H, corrupt JPEG"); 1011 z.img_comp[i].v = q & 15; if (!z.img_comp[i].v || z.img_comp[i].v > 4) throw new STBImageException("Bad V, corrupt JPEG"); 1012 z.img_comp[i].tq = get8(s); if (z.img_comp[i].tq > 3) throw new STBImageException("Bad TQ, corrupt JPEG"); 1013 } 1014 1015 if (scan != SCAN_load) return 1; 1016 1017 if ((1 << 30) / s.img_x / s.img_n < s.img_y) throw new STBImageException("Image too large to decode"); 1018 1019 for (i=0; i < s.img_n; ++i) { 1020 if (z.img_comp[i].h > h_max) h_max = z.img_comp[i].h; 1021 if (z.img_comp[i].v > v_max) v_max = z.img_comp[i].v; 1022 } 1023 1024 // compute interleaved mcu info 1025 z.img_h_max = h_max; 1026 z.img_v_max = v_max; 1027 z.img_mcu_w = h_max * 8; 1028 z.img_mcu_h = v_max * 8; 1029 z.img_mcu_x = (s.img_x + z.img_mcu_w-1) / z.img_mcu_w; 1030 z.img_mcu_y = (s.img_y + z.img_mcu_h-1) / z.img_mcu_h; 1031 1032 for (i=0; i < s.img_n; ++i) { 1033 // number of effective pixels (e.g. for non-interleaved MCU) 1034 z.img_comp[i].x = (s.img_x * z.img_comp[i].h + h_max-1) / h_max; 1035 z.img_comp[i].y = (s.img_y * z.img_comp[i].v + v_max-1) / v_max; 1036 // to simplify generation, we'll allocate enough memory to decode 1037 // the bogus oversized data from using interleaved MCUs and their 1038 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't 1039 // discard the extra data until colorspace conversion 1040 z.img_comp[i].w2 = z.img_mcu_x * z.img_comp[i].h * 8; 1041 z.img_comp[i].h2 = z.img_mcu_y * z.img_comp[i].v * 8; 1042 z.img_comp[i].raw_data = malloc(z.img_comp[i].w2 * z.img_comp[i].h2+15); 1043 if (z.img_comp[i].raw_data == null) { 1044 for(--i; i >= 0; --i) { 1045 free(z.img_comp[i].raw_data); 1046 z.img_comp[i].data = null; 1047 } 1048 throw new STBImageException("Out of memory"); 1049 } 1050 // align blocks for installable-idct using mmx/sse 1051 z.img_comp[i].data = cast(ubyte*) (( cast(size_t) z.img_comp[i].raw_data + 15) & ~15); 1052 z.img_comp[i].linebuf = null; 1053 } 1054 1055 return 1; 1056 } 1057 1058 // use comparisons since in some cases we handle more than one case (e.g. SOF) 1059 bool DNL(int x) { return x == 0xdc; } 1060 bool SOI(int x) { return x == 0xd8; } 1061 bool EOI(int x) { return x == 0xd9; } 1062 bool SOF(int x) { return x == 0xc0 || x == 0xc1; } 1063 bool SOS(int x) { return x == 0xda; } 1064 1065 int decode_jpeg_header(jpeg *z, int scan) 1066 { 1067 int m; 1068 z.marker = MARKER_none; // initialize cached marker to empty 1069 m = get_marker(z); 1070 if (!SOI(m)) throw new STBImageException("No SOI, corrupt JPEG"); 1071 if (scan == SCAN_type) return 1; 1072 m = get_marker(z); 1073 while (!SOF(m)) 1074 { 1075 1076 if (!process_marker(z,m)) return 0; 1077 m = get_marker(z); 1078 1079 1080 1081 while (m == MARKER_none) 1082 { 1083 // some files have extra padding after their blocks, so ok, we'll scan 1084 if (at_eof(z.s)) throw new STBImageException("No SOF, corrupt JPEG"); 1085 m = get_marker(z); 1086 } 1087 } 1088 if (!process_frame_header(z, scan)) return 0; 1089 return 1; 1090 } 1091 1092 int decode_jpeg_image(jpeg *j) 1093 { 1094 int m; 1095 j.restart_interval = 0; 1096 if (!decode_jpeg_header(j, SCAN_load)) return 0; 1097 m = get_marker(j); 1098 while (!EOI(m)) { 1099 if (SOS(m)) { 1100 if (!process_scan_header(j)) return 0; 1101 if (!parse_entropy_coded_data(j)) return 0; 1102 if (j.marker == MARKER_none ) { 1103 // handle 0s at the end of image data from IP Kamera 9060 1104 while (!at_eof(j.s)) { 1105 int x = get8(j.s); 1106 if (x == 255) { 1107 j.marker = get8u(j.s); 1108 break; 1109 } else if (x != 0) { 1110 return 0; 1111 } 1112 } 1113 // if we reach eof without hitting a marker, get_marker() below will fail and we'll eventually return 0 1114 } 1115 } else { 1116 if (!process_marker(j, m)) return 0; 1117 } 1118 m = get_marker(j); 1119 } 1120 return 1; 1121 } 1122 1123 // static jfif-centered resampling (across block boundaries) 1124 1125 alias resample_row_func = ubyte* function(ubyte *out_, ubyte *in0, ubyte *in1, int w, int hs); 1126 1127 ubyte div4(int x) 1128 { 1129 return cast(ubyte)(x >> 2); 1130 } 1131 1132 ubyte *resample_row_1(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1133 { 1134 return in_near; 1135 } 1136 1137 ubyte* resample_row_v_2(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1138 { 1139 // need to generate two samples vertically for every one in input 1140 int i; 1141 for (i=0; i < w; ++i) 1142 out_[i] = div4(3*in_near[i] + in_far[i] + 2); 1143 return out_; 1144 } 1145 1146 ubyte* resample_row_h_2(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1147 { 1148 // need to generate two samples horizontally for every one in input 1149 int i; 1150 ubyte *input = in_near; 1151 1152 if (w == 1) { 1153 // if only one sample, can't do any interpolation 1154 out_[0] = out_[1] = input[0]; 1155 return out_; 1156 } 1157 1158 out_[0] = input[0]; 1159 out_[1] = div4(input[0]*3 + input[1] + 2); 1160 for (i=1; i < w-1; ++i) { 1161 int n = 3*input[i]+2; 1162 out_[i*2+0] = div4(n+input[i-1]); 1163 out_[i*2+1] = div4(n+input[i+1]); 1164 } 1165 out_[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2); 1166 out_[i*2+1] = input[w-1]; 1167 1168 return out_; 1169 } 1170 1171 ubyte div16(int x) 1172 { 1173 return cast(ubyte)(x >> 4); 1174 } 1175 1176 1177 ubyte *resample_row_hv_2(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1178 { 1179 // need to generate 2x2 samples for every one in input 1180 int i,t0,t1; 1181 if (w == 1) { 1182 out_[0] = out_[1] = div4(3*in_near[0] + in_far[0] + 2); 1183 return out_; 1184 } 1185 1186 t1 = 3*in_near[0] + in_far[0]; 1187 out_[0] = div4(t1+2); 1188 for (i=1; i < w; ++i) { 1189 t0 = t1; 1190 t1 = 3*in_near[i]+in_far[i]; 1191 out_[i*2-1] = div16(3*t0 + t1 + 8); 1192 out_[i*2 ] = div16(3*t1 + t0 + 8); 1193 } 1194 out_[w*2-1] = div4(t1+2); 1195 1196 return out_; 1197 } 1198 1199 ubyte *resample_row_generic(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1200 { 1201 // resample with nearest-neighbor 1202 int i,j; 1203 in_far = in_far; 1204 for (i=0; i < w; ++i) 1205 for (j=0; j < hs; ++j) 1206 out_[i*hs+j] = in_near[i]; 1207 return out_; 1208 } 1209 1210 int float2fixed(double x) 1211 { 1212 return cast(int)((x) * 65536 + 0.5); 1213 } 1214 1215 // 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro) 1216 // VC6 without processor=Pro is generating multiple LEAs per multiply! 1217 void YCbCr_to_RGB_row(ubyte *out_, const ubyte *y, const ubyte *pcb, const ubyte *pcr, int count, int step) 1218 { 1219 int i; 1220 for (i=0; i < count; ++i) { 1221 int y_fixed = (y[i] << 16) + 32768; // rounding 1222 int r,g,b; 1223 int cr = pcr[i] - 128; 1224 int cb = pcb[i] - 128; 1225 r = y_fixed + cr*float2fixed(1.40200f); 1226 g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f); 1227 b = y_fixed + cb*float2fixed(1.77200f); 1228 r >>= 16; 1229 g >>= 16; 1230 b >>= 16; 1231 if (cast(uint) r > 255) { if (r < 0) r = 0; else r = 255; } 1232 if (cast(uint) g > 255) { if (g < 0) g = 0; else g = 255; } 1233 if (cast(uint) b > 255) { if (b < 0) b = 0; else b = 255; } 1234 out_[0] = cast(ubyte)r; 1235 out_[1] = cast(ubyte)g; 1236 out_[2] = cast(ubyte)b; 1237 out_[3] = 255; 1238 out_ += step; 1239 } 1240 } 1241 1242 // clean up the temporary component buffers 1243 void cleanup_jpeg(jpeg *j) 1244 { 1245 int i; 1246 for (i=0; i < j.s.img_n; ++i) { 1247 if (j.img_comp[i].data) { 1248 free(j.img_comp[i].raw_data); 1249 j.img_comp[i].data = null; 1250 } 1251 if (j.img_comp[i].linebuf) { 1252 free(j.img_comp[i].linebuf); 1253 j.img_comp[i].linebuf = null; 1254 } 1255 } 1256 } 1257 1258 struct stbi_resample 1259 { 1260 resample_row_func resample; 1261 ubyte* line0; 1262 ubyte* line1; 1263 int hs,vs; // expansion factor in each axis 1264 int w_lores; // horizontal pixels pre-expansion 1265 int ystep; // how far through vertical expansion we are 1266 int ypos; // which pre-expansion row we're on 1267 } ; 1268 1269 ubyte *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) 1270 { 1271 int n, decode_n; 1272 // validate req_comp 1273 if (req_comp < 0 || req_comp > 4) 1274 throw new STBImageException("Internal error: bad req_comp"); 1275 z.s.img_n = 0; 1276 1277 // load a jpeg image from whichever source 1278 if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return null; } 1279 1280 // determine actual number of components to generate 1281 n = req_comp ? req_comp : z.s.img_n; 1282 1283 if (z.s.img_n == 3 && n < 3) 1284 decode_n = 1; 1285 else 1286 decode_n = z.s.img_n; 1287 1288 // resample and color-convert 1289 { 1290 int k; 1291 uint i,j; 1292 ubyte *output; 1293 ubyte*[4] coutput; 1294 1295 stbi_resample[4] res_comp; 1296 1297 for (k=0; k < decode_n; ++k) { 1298 stbi_resample *r = &res_comp[k]; 1299 1300 // allocate line buffer big enough for upsampling off the edges 1301 // with upsample factor of 4 1302 z.img_comp[k].linebuf = cast(ubyte*) malloc(z.s.img_x + 3); 1303 if (!z.img_comp[k].linebuf) 1304 { 1305 cleanup_jpeg(z); 1306 throw new STBImageException("Out of memory"); 1307 } 1308 1309 r.hs = z.img_h_max / z.img_comp[k].h; 1310 r.vs = z.img_v_max / z.img_comp[k].v; 1311 r.ystep = r.vs >> 1; 1312 r.w_lores = (z.s.img_x + r.hs-1) / r.hs; 1313 r.ypos = 0; 1314 r.line0 = r.line1 = z.img_comp[k].data; 1315 1316 if (r.hs == 1 && r.vs == 1) r.resample = &resample_row_1; 1317 else if (r.hs == 1 && r.vs == 2) r.resample = &resample_row_v_2; 1318 else if (r.hs == 2 && r.vs == 1) r.resample = &resample_row_h_2; 1319 else if (r.hs == 2 && r.vs == 2) r.resample = &resample_row_hv_2; 1320 else r.resample = &resample_row_generic; 1321 } 1322 1323 // can't error after this so, this is safe 1324 output = cast(ubyte*) malloc(n * z.s.img_x * z.s.img_y + 1); 1325 if (!output) { cleanup_jpeg(z); throw new STBImageException("Out of memory"); } 1326 1327 // now go ahead and resample 1328 for (j=0; j < z.s.img_y; ++j) { 1329 ubyte *out_ = output + n * z.s.img_x * j; 1330 for (k=0; k < decode_n; ++k) { 1331 stbi_resample *r = &res_comp[k]; 1332 int y_bot = r.ystep >= (r.vs >> 1); 1333 coutput[k] = r.resample(z.img_comp[k].linebuf, 1334 y_bot ? r.line1 : r.line0, 1335 y_bot ? r.line0 : r.line1, 1336 r.w_lores, r.hs); 1337 if (++r.ystep >= r.vs) { 1338 r.ystep = 0; 1339 r.line0 = r.line1; 1340 if (++r.ypos < z.img_comp[k].y) 1341 r.line1 += z.img_comp[k].w2; 1342 } 1343 } 1344 if (n >= 3) { 1345 ubyte *y = coutput[0]; 1346 if (z.s.img_n == 3) { 1347 YCbCr_to_RGB_row(out_, y, coutput[1], coutput[2], z.s.img_x, n); 1348 } else 1349 for (i=0; i < z.s.img_x; ++i) { 1350 out_[0] = out_[1] = out_[2] = y[i]; 1351 out_[3] = 255; // not used if n==3 1352 out_ += n; 1353 } 1354 } else { 1355 ubyte *y = coutput[0]; 1356 if (n == 1) 1357 for (i=0; i < z.s.img_x; ++i) out_[i] = y[i]; 1358 else 1359 for (i=0; i < z.s.img_x; ++i) *out_++ = y[i], *out_++ = 255; 1360 } 1361 } 1362 cleanup_jpeg(z); 1363 *out_x = z.s.img_x; 1364 *out_y = z.s.img_y; 1365 if (comp) *comp = z.s.img_n; // report original components, not output 1366 return output; 1367 } 1368 } 1369 1370 ubyte* stbi_jpeg_load(stbi *s, int *x, int *y, int *comp, int req_comp) 1371 { 1372 jpeg j; 1373 j.s = s; 1374 return load_jpeg_image(&j, x,y,comp,req_comp); 1375 } 1376 1377 void stbi_jpeg_test(stbi *s) 1378 { 1379 jpeg j; 1380 j.s = s; 1381 int r = decode_jpeg_header(&j, SCAN_type); 1382 if (r == 0) 1383 throw new STBImageException("Couldn't decode JPEG header"); 1384 } 1385 1386 1387 // public domain zlib decode v0.2 Sean Barrett 2006-11-18 1388 // simple implementation 1389 // - all input must be provided in an upfront buffer 1390 // - all output is written to a single output buffer (can malloc/realloc) 1391 // performance 1392 // - fast huffman 1393 1394 // fast-way is faster to check than jpeg huffman, but slow way is slower 1395 enum ZFAST_BITS = 9; // accelerate all cases in default tables 1396 enum ZFAST_MASK = ((1 << ZFAST_BITS) - 1); 1397 1398 // zlib-style huffman encoding 1399 // (jpegs packs from left, zlib from right, so can't share code) 1400 struct zhuffman 1401 { 1402 ushort[1 << ZFAST_BITS] fast; 1403 ushort[16] firstcode; 1404 int[17] maxcode; 1405 ushort[16] firstsymbol; 1406 ubyte[288] size; 1407 ushort[288] value; 1408 } ; 1409 1410 int bitreverse16(int n) 1411 { 1412 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); 1413 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); 1414 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); 1415 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); 1416 return n; 1417 } 1418 1419 int bit_reverse(int v, int bits) 1420 { 1421 assert(bits <= 16); 1422 // to bit reverse n bits, reverse 16 and shift 1423 // e.g. 11 bits, bit reverse and shift away 5 1424 return bitreverse16(v) >> (16-bits); 1425 } 1426 1427 int zbuild_huffman(zhuffman *z, ubyte *sizelist, int num) 1428 { 1429 int i,k=0; 1430 int code; 1431 int[16] next_code; 1432 int[17] sizes; 1433 1434 // DEFLATE spec for generating codes 1435 memset(sizes.ptr, 0, sizes.sizeof); 1436 memset(z.fast.ptr, 255, z.fast.sizeof); 1437 for (i=0; i < num; ++i) 1438 ++sizes[sizelist[i]]; 1439 sizes[0] = 0; 1440 for (i=1; i < 16; ++i) 1441 assert(sizes[i] <= (1 << i)); 1442 code = 0; 1443 for (i=1; i < 16; ++i) { 1444 next_code[i] = code; 1445 z.firstcode[i] = cast(ushort) code; 1446 z.firstsymbol[i] = cast(ushort) k; 1447 code = (code + sizes[i]); 1448 if (sizes[i]) 1449 if (code-1 >= (1 << i)) 1450 throw new STBImageException("Bad codelength, corrupt JPEG"); 1451 z.maxcode[i] = code << (16-i); // preshift for inner loop 1452 code <<= 1; 1453 k += sizes[i]; 1454 } 1455 z.maxcode[16] = 0x10000; // sentinel 1456 for (i=0; i < num; ++i) { 1457 int s = sizelist[i]; 1458 if (s) { 1459 int c = next_code[s] - z.firstcode[s] + z.firstsymbol[s]; 1460 z.size[c] = cast(ubyte)s; 1461 z.value[c] = cast(ushort)i; 1462 if (s <= ZFAST_BITS) { 1463 int k_ = bit_reverse(next_code[s],s); 1464 while (k_ < (1 << ZFAST_BITS)) { 1465 z.fast[k_] = cast(ushort) c; 1466 k_ += (1 << s); 1467 } 1468 } 1469 ++next_code[s]; 1470 } 1471 } 1472 return 1; 1473 } 1474 1475 // zlib-from-memory implementation for PNG reading 1476 // because PNG allows splitting the zlib stream arbitrarily, 1477 // and it's annoying structurally to have PNG call ZLIB call PNG, 1478 // we require PNG read all the IDATs and combine them into a single 1479 // memory buffer 1480 1481 struct zbuf 1482 { 1483 const(ubyte) *zbuffer; 1484 const(ubyte) *zbuffer_end; 1485 int num_bits; 1486 uint code_buffer; 1487 1488 ubyte *zout; 1489 ubyte *zout_start; 1490 ubyte *zout_end; 1491 int z_expandable; 1492 1493 zhuffman z_length, z_distance; 1494 } ; 1495 1496 int zget8(zbuf *z) 1497 { 1498 if (z.zbuffer >= z.zbuffer_end) return 0; 1499 return *z.zbuffer++; 1500 } 1501 1502 void fill_bits(zbuf *z) 1503 { 1504 do { 1505 assert(z.code_buffer < (1U << z.num_bits)); 1506 z.code_buffer |= zget8(z) << z.num_bits; 1507 z.num_bits += 8; 1508 } while (z.num_bits <= 24); 1509 } 1510 1511 uint zreceive(zbuf *z, int n) 1512 { 1513 uint k; 1514 if (z.num_bits < n) fill_bits(z); 1515 k = z.code_buffer & ((1 << n) - 1); 1516 z.code_buffer >>= n; 1517 z.num_bits -= n; 1518 return k; 1519 } 1520 1521 int zhuffman_decode(zbuf *a, zhuffman *z) 1522 { 1523 int b,s,k; 1524 if (a.num_bits < 16) fill_bits(a); 1525 b = z.fast[a.code_buffer & ZFAST_MASK]; 1526 if (b < 0xffff) { 1527 s = z.size[b]; 1528 a.code_buffer >>= s; 1529 a.num_bits -= s; 1530 return z.value[b]; 1531 } 1532 1533 // not resolved by fast table, so compute it the slow way 1534 // use jpeg approach, which requires MSbits at top 1535 k = bit_reverse(a.code_buffer, 16); 1536 for (s=ZFAST_BITS+1; ; ++s) 1537 if (k < z.maxcode[s]) 1538 break; 1539 if (s == 16) return -1; // invalid code! 1540 // code size is s, so: 1541 b = (k >> (16-s)) - z.firstcode[s] + z.firstsymbol[s]; 1542 assert(z.size[b] == s); 1543 a.code_buffer >>= s; 1544 a.num_bits -= s; 1545 return z.value[b]; 1546 } 1547 1548 int expand(zbuf *z, int n) // need to make room for n bytes 1549 { 1550 ubyte *q; 1551 int cur, limit; 1552 if (!z.z_expandable) 1553 throw new STBImageException("Output buffer limit, corrupt PNG"); 1554 cur = cast(int) (z.zout - z.zout_start); 1555 limit = cast(int) (z.zout_end - z.zout_start); 1556 while (cur + n > limit) 1557 limit *= 2; 1558 q = cast(ubyte*) realloc(z.zout_start, limit); 1559 if (q == null) 1560 throw new STBImageException("Out of memory"); 1561 z.zout_start = q; 1562 z.zout = q + cur; 1563 z.zout_end = q + limit; 1564 return 1; 1565 } 1566 1567 static immutable int[31] length_base = [ 1568 3,4,5,6,7,8,9,10,11,13, 1569 15,17,19,23,27,31,35,43,51,59, 1570 67,83,99,115,131,163,195,227,258,0,0 ]; 1571 1572 static immutable int[31] length_extra = 1573 [ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 ]; 1574 1575 static immutable int[32] dist_base = [ 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 1576 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0]; 1577 1578 static immutable int[32] dist_extra = 1579 [ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13]; 1580 1581 int parse_huffman_block(zbuf *a) 1582 { 1583 for(;;) { 1584 int z = zhuffman_decode(a, &a.z_length); 1585 if (z < 256) { 1586 if (z < 0) 1587 throw new STBImageException("Bad Huffman code, corrupt PNG"); 1588 if (a.zout >= a.zout_end) if (!expand(a, 1)) return 0; 1589 *a.zout++ = cast(ubyte) z; 1590 } else { 1591 ubyte *p; 1592 int len,dist; 1593 if (z == 256) return 1; 1594 z -= 257; 1595 len = length_base[z]; 1596 if (length_extra[z]) len += zreceive(a, length_extra[z]); 1597 z = zhuffman_decode(a, &a.z_distance); 1598 if (z < 0) throw new STBImageException("Bad Huffman code, corrupt PNG"); 1599 dist = dist_base[z]; 1600 if (dist_extra[z]) dist += zreceive(a, dist_extra[z]); 1601 if (a.zout - a.zout_start < dist) throw new STBImageException("Bad dist, corrupt PNG"); 1602 if (a.zout + len > a.zout_end) if (!expand(a, len)) return 0; 1603 p = a.zout - dist; 1604 while (len--) 1605 *a.zout++ = *p++; 1606 } 1607 } 1608 } 1609 1610 int compute_huffman_codes(zbuf *a) 1611 { 1612 static immutable ubyte[19] length_dezigzag = [ 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 ]; 1613 zhuffman z_codelength; 1614 ubyte[286+32+137] lencodes;//padding for maximum single op 1615 ubyte[19] codelength_sizes; 1616 int i,n; 1617 1618 int hlit = zreceive(a,5) + 257; 1619 int hdist = zreceive(a,5) + 1; 1620 int hclen = zreceive(a,4) + 4; 1621 1622 memset(codelength_sizes.ptr, 0, codelength_sizes.sizeof); 1623 for (i=0; i < hclen; ++i) { 1624 int s = zreceive(a,3); 1625 codelength_sizes[length_dezigzag[i]] = cast(ubyte) s; 1626 } 1627 if (!zbuild_huffman(&z_codelength, codelength_sizes.ptr, 19)) return 0; 1628 1629 n = 0; 1630 while (n < hlit + hdist) { 1631 int c = zhuffman_decode(a, &z_codelength); 1632 assert(c >= 0 && c < 19); 1633 if (c < 16) 1634 lencodes[n++] = cast(ubyte) c; 1635 else if (c == 16) { 1636 c = zreceive(a,2)+3; 1637 memset(lencodes.ptr+n, lencodes[n-1], c); 1638 n += c; 1639 } else if (c == 17) { 1640 c = zreceive(a,3)+3; 1641 memset(lencodes.ptr+n, 0, c); 1642 n += c; 1643 } else { 1644 assert(c == 18); 1645 c = zreceive(a,7)+11; 1646 memset(lencodes.ptr+n, 0, c); 1647 n += c; 1648 } 1649 } 1650 if (n != hlit+hdist) throw new STBImageException("Bad codelengths, corrupt PNG"); 1651 if (!zbuild_huffman(&a.z_length, lencodes.ptr, hlit)) return 0; 1652 if (!zbuild_huffman(&a.z_distance, lencodes.ptr+hlit, hdist)) return 0; 1653 return 1; 1654 } 1655 1656 int parse_uncompressed_block(zbuf *a) 1657 { 1658 ubyte[4] header; 1659 int len,nlen,k; 1660 if (a.num_bits & 7) 1661 zreceive(a, a.num_bits & 7); // discard 1662 // drain the bit-packed data into header 1663 k = 0; 1664 while (a.num_bits > 0) { 1665 header[k++] = cast(ubyte) (a.code_buffer & 255); // wtf this warns? 1666 a.code_buffer >>= 8; 1667 a.num_bits -= 8; 1668 } 1669 assert(a.num_bits == 0); 1670 // now fill header the normal way 1671 while (k < 4) 1672 header[k++] = cast(ubyte) zget8(a); 1673 len = header[1] * 256 + header[0]; 1674 nlen = header[3] * 256 + header[2]; 1675 if (nlen != (len ^ 0xffff)) throw new STBImageException("Zlib corrupt, corrupt PNG"); 1676 if (a.zbuffer + len > a.zbuffer_end) throw new STBImageException("Read past buffer, corrupt PNG"); 1677 if (a.zout + len > a.zout_end) 1678 if (!expand(a, len)) return 0; 1679 memcpy(a.zout, a.zbuffer, len); 1680 a.zbuffer += len; 1681 a.zout += len; 1682 return 1; 1683 } 1684 1685 int parse_zlib_header(zbuf *a) 1686 { 1687 int cmf = zget8(a); 1688 int cm = cmf & 15; 1689 /* int cinfo = cmf >> 4; */ 1690 int flg = zget8(a); 1691 if ((cmf*256+flg) % 31 != 0) throw new STBImageException("Bad zlib header, corrupt PNG"); // zlib spec 1692 if (flg & 32) throw new STBImageException("No preset dict, corrupt PNG"); // preset dictionary not allowed in png 1693 if (cm != 8) throw new STBImageException("Bad compression, corrupt PNG"); // DEFLATE required for png 1694 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output 1695 return 1; 1696 } 1697 1698 // @TODO: should statically initialize these for optimal thread safety 1699 __gshared ubyte[288] default_length; 1700 __gshared ubyte[32] default_distance; 1701 1702 void init_defaults() 1703 { 1704 int i; // use <= to match clearly with spec 1705 for (i=0; i <= 143; ++i) default_length[i] = 8; 1706 for ( ; i <= 255; ++i) default_length[i] = 9; 1707 for ( ; i <= 279; ++i) default_length[i] = 7; 1708 for ( ; i <= 287; ++i) default_length[i] = 8; 1709 1710 for (i=0; i <= 31; ++i) default_distance[i] = 5; 1711 } 1712 1713 __gshared int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead 1714 int parse_zlib(zbuf *a, int parse_header) 1715 { 1716 int final_, type; 1717 if (parse_header) 1718 if (!parse_zlib_header(a)) return 0; 1719 a.num_bits = 0; 1720 a.code_buffer = 0; 1721 do { 1722 final_ = zreceive(a,1); 1723 type = zreceive(a,2); 1724 if (type == 0) { 1725 if (!parse_uncompressed_block(a)) return 0; 1726 } else if (type == 3) { 1727 return 0; 1728 } else { 1729 if (type == 1) { 1730 // use fixed code lengths 1731 if (!default_distance[31]) init_defaults(); 1732 if (!zbuild_huffman(&a.z_length , default_length.ptr , 288)) return 0; 1733 if (!zbuild_huffman(&a.z_distance, default_distance.ptr, 32)) return 0; 1734 } else { 1735 if (!compute_huffman_codes(a)) return 0; 1736 } 1737 if (!parse_huffman_block(a)) return 0; 1738 } 1739 if (stbi_png_partial && a.zout - a.zout_start > 65536) 1740 break; 1741 } while (!final_); 1742 return 1; 1743 } 1744 1745 int do_zlib(zbuf *a, ubyte *obuf, int olen, int exp, int parse_header) 1746 { 1747 a.zout_start = obuf; 1748 a.zout = obuf; 1749 a.zout_end = obuf + olen; 1750 a.z_expandable = exp; 1751 1752 return parse_zlib(a, parse_header); 1753 } 1754 1755 ubyte *stbi_zlib_decode_malloc_guesssize(const(ubyte) *buffer, int len, int initial_size, int *outlen) 1756 { 1757 zbuf a; 1758 ubyte *p = cast(ubyte*) malloc(initial_size); 1759 if (p == null) return null; 1760 a.zbuffer = buffer; 1761 a.zbuffer_end = buffer + len; 1762 if (do_zlib(&a, p, initial_size, 1, 1)) { 1763 if (outlen) *outlen = cast(int) (a.zout - a.zout_start); 1764 return a.zout_start; 1765 } else { 1766 free(a.zout_start); 1767 return null; 1768 } 1769 } 1770 1771 ubyte *stbi_zlib_decode_malloc(const(ubyte) *buffer, int len, int *outlen) 1772 { 1773 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); 1774 } 1775 1776 ubyte *stbi_zlib_decode_malloc_guesssize_headerflag(const(ubyte) *buffer, int len, int initial_size, int *outlen, int parse_header) 1777 { 1778 zbuf a; 1779 ubyte *p = cast(ubyte*) malloc(initial_size); 1780 if (p == null) return null; 1781 a.zbuffer = buffer; 1782 a.zbuffer_end = buffer + len; 1783 if (do_zlib(&a, p, initial_size, 1, parse_header)) { 1784 if (outlen) *outlen = cast(int) (a.zout - a.zout_start); 1785 return a.zout_start; 1786 } else { 1787 free(a.zout_start); 1788 return null; 1789 } 1790 } 1791 1792 int stbi_zlib_decode_buffer(ubyte* obuffer, int olen, const(ubyte)* ibuffer, int ilen) 1793 { 1794 zbuf a; 1795 a.zbuffer = ibuffer; 1796 a.zbuffer_end = ibuffer + ilen; 1797 if (do_zlib(&a, obuffer, olen, 0, 1)) 1798 return cast(int) (a.zout - a.zout_start); 1799 else 1800 return -1; 1801 } 1802 1803 ubyte *stbi_zlib_decode_noheader_malloc(const(ubyte) *buffer, int len, int *outlen) 1804 { 1805 zbuf a; 1806 ubyte *p = cast(ubyte*) malloc(16384); 1807 if (p == null) return null; 1808 a.zbuffer = buffer; 1809 a.zbuffer_end = buffer+len; 1810 if (do_zlib(&a, p, 16384, 1, 0)) { 1811 if (outlen) *outlen = cast(int) (a.zout - a.zout_start); 1812 return a.zout_start; 1813 } else { 1814 free(a.zout_start); 1815 return null; 1816 } 1817 } 1818 1819 int stbi_zlib_decode_noheader_buffer(ubyte *obuffer, int olen, const(ubyte) *ibuffer, int ilen) 1820 { 1821 zbuf a; 1822 a.zbuffer = ibuffer; 1823 a.zbuffer_end = ibuffer + ilen; 1824 if (do_zlib(&a, obuffer, olen, 0, 0)) 1825 return cast(int) (a.zout - a.zout_start); 1826 else 1827 return -1; 1828 } 1829 1830 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 1831 // simple implementation 1832 // - only 8-bit samples 1833 // - no CRC checking 1834 // - allocates lots of intermediate memory 1835 // - avoids problem of streaming data between subsystems 1836 // - avoids explicit window management 1837 // performance 1838 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding 1839 1840 1841 struct chunk 1842 { 1843 uint length; 1844 uint type; 1845 } 1846 1847 uint PNG_TYPE(ubyte a, ubyte b, ubyte c, ubyte d) 1848 { 1849 return (a << 24) + (b << 16) + (c << 8) + d; 1850 } 1851 1852 chunk get_chunk_header(stbi *s) 1853 { 1854 chunk c; 1855 c.length = get32(s); 1856 c.type = get32(s); 1857 return c; 1858 } 1859 1860 static int check_png_header(stbi *s) 1861 { 1862 static immutable ubyte[8] png_sig = [ 137, 80, 78, 71, 13, 10, 26, 10 ]; 1863 for (int i = 0; i < 8; ++i) 1864 { 1865 ubyte headerByte = get8u(s); 1866 ubyte expected = png_sig[i]; 1867 if (headerByte != expected) 1868 throw new STBImageException("Bad PNG sig, not a PNG"); 1869 } 1870 return 1; 1871 } 1872 1873 struct png 1874 { 1875 stbi *s; 1876 ubyte *idata; 1877 ubyte *expanded; 1878 ubyte *out_; 1879 } 1880 1881 1882 enum : int 1883 { 1884 F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4, 1885 F_avg_first, F_paeth_first 1886 } 1887 1888 static immutable ubyte[5] first_row_filter = 1889 [ 1890 F_none, F_sub, F_none, F_avg_first, F_paeth_first 1891 ]; 1892 1893 static int paeth(int a, int b, int c) 1894 { 1895 int p = a + b - c; 1896 int pa = abs(p-a); 1897 int pb = abs(p-b); 1898 int pc = abs(p-c); 1899 if (pa <= pb && pa <= pc) return a; 1900 if (pb <= pc) return b; 1901 return c; 1902 } 1903 1904 // create the png data from post-deflated data 1905 static int create_png_image_raw(png *a, ubyte *raw, uint raw_len, int out_n, uint x, uint y) 1906 { 1907 stbi *s = a.s; 1908 uint i,j,stride = x*out_n; 1909 int k; 1910 int img_n = s.img_n; // copy it into a local for later 1911 assert(out_n == s.img_n || out_n == s.img_n+1); 1912 if (stbi_png_partial) y = 1; 1913 a.out_ = cast(ubyte*) malloc(x * y * out_n); 1914 if (!a.out_) throw new STBImageException("Out of memory"); 1915 if (!stbi_png_partial) { 1916 if (s.img_x == x && s.img_y == y) { 1917 if (raw_len != (img_n * x + 1) * y) throw new STBImageException("Not enough pixels, corrupt PNG"); 1918 } else { // interlaced: 1919 if (raw_len < (img_n * x + 1) * y) throw new STBImageException("Not enough pixels, corrupt PNG"); 1920 } 1921 } 1922 for (j=0; j < y; ++j) { 1923 ubyte *cur = a.out_ + stride*j; 1924 ubyte *prior = cur - stride; 1925 int filter = *raw++; 1926 if (filter > 4) throw new STBImageException("Invalid filter, corrupt PNG"); 1927 // if first row, use special filter that doesn't sample previous row 1928 if (j == 0) filter = first_row_filter[filter]; 1929 // handle first pixel explicitly 1930 for (k=0; k < img_n; ++k) { 1931 switch (filter) { 1932 case F_none : cur[k] = raw[k]; break; 1933 case F_sub : cur[k] = raw[k]; break; 1934 case F_up : cur[k] = cast(ubyte)(raw[k] + prior[k]); break; 1935 case F_avg : cur[k] = cast(ubyte)(raw[k] + (prior[k]>>1)); break; 1936 case F_paeth : cur[k] = cast(ubyte) (raw[k] + paeth(0,prior[k],0)); break; 1937 case F_avg_first : cur[k] = raw[k]; break; 1938 case F_paeth_first: cur[k] = raw[k]; break; 1939 default: break; 1940 } 1941 } 1942 if (img_n != out_n) cur[img_n] = 255; 1943 raw += img_n; 1944 cur += out_n; 1945 prior += out_n; 1946 // this is a little gross, so that we don't switch per-pixel or per-component 1947 if (img_n == out_n) { 1948 1949 for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) 1950 for (k=0; k < img_n; ++k) 1951 { 1952 switch (filter) { 1953 case F_none: cur[k] = raw[k]; break; 1954 case F_sub: cur[k] = cast(ubyte)(raw[k] + cur[k-img_n]); break; 1955 case F_up: cur[k] = cast(ubyte)(raw[k] + prior[k]); break; 1956 case F_avg: cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-img_n])>>1)); break; 1957 case F_paeth: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break; 1958 case F_avg_first: cur[k] = cast(ubyte)(raw[k] + (cur[k-img_n] >> 1)); break; 1959 case F_paeth_first: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],0,0)); break; 1960 default: break; 1961 } 1962 } 1963 } else { 1964 assert(img_n+1 == out_n); 1965 1966 for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) 1967 for (k=0; k < img_n; ++k) 1968 { 1969 switch (filter) { 1970 case F_none: cur[k] = raw[k]; break; 1971 case F_sub: cur[k] = cast(ubyte)(raw[k] + cur[k-out_n]); break; 1972 case F_up: cur[k] = cast(ubyte)(raw[k] + prior[k]); break; 1973 case F_avg: cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-out_n])>>1)); break; 1974 case F_paeth: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break; 1975 case F_avg_first: cur[k] = cast(ubyte)(raw[k] + (cur[k-out_n] >> 1)); break; 1976 case F_paeth_first: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],0,0)); break; 1977 default: break; 1978 } 1979 } 1980 } 1981 } 1982 return 1; 1983 } 1984 1985 int create_png_image(png *a, ubyte *raw, uint raw_len, int out_n, int interlaced) 1986 { 1987 ubyte *final_; 1988 int p; 1989 int save; 1990 if (!interlaced) 1991 return create_png_image_raw(a, raw, raw_len, out_n, a.s.img_x, a.s.img_y); 1992 save = stbi_png_partial; 1993 stbi_png_partial = 0; 1994 1995 // de-interlacing 1996 final_ = cast(ubyte*) malloc(a.s.img_x * a.s.img_y * out_n); 1997 for (p=0; p < 7; ++p) { 1998 static immutable int[7] xorig = [ 0,4,0,2,0,1,0 ]; 1999 static immutable int[7] yorig = [ 0,0,4,0,2,0,1 ]; 2000 static immutable int[7] xspc = [ 8,8,4,4,2,2,1 ]; 2001 static immutable int[7] yspc = [ 8,8,8,4,4,2,2 ]; 2002 int i,j,x,y; 2003 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 2004 x = (a.s.img_x - xorig[p] + xspc[p]-1) / xspc[p]; 2005 y = (a.s.img_y - yorig[p] + yspc[p]-1) / yspc[p]; 2006 if (x && y) { 2007 if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) { 2008 free(final_); 2009 return 0; 2010 } 2011 for (j=0; j < y; ++j) 2012 for (i=0; i < x; ++i) 2013 memcpy(final_ + (j*yspc[p]+yorig[p])*a.s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n, 2014 a.out_ + (j*x+i)*out_n, out_n); 2015 free(a.out_); 2016 raw += (x*out_n+1)*y; 2017 raw_len -= (x*out_n+1)*y; 2018 } 2019 } 2020 a.out_ = final_; 2021 2022 stbi_png_partial = save; 2023 return 1; 2024 } 2025 2026 static int compute_transparency(png *z, ubyte[3] tc, int out_n) 2027 { 2028 stbi *s = z.s; 2029 uint i, pixel_count = s.img_x * s.img_y; 2030 ubyte *p = z.out_; 2031 2032 // compute color-based transparency, assuming we've 2033 // already got 255 as the alpha value in the output 2034 assert(out_n == 2 || out_n == 4); 2035 2036 if (out_n == 2) { 2037 for (i=0; i < pixel_count; ++i) { 2038 p[1] = (p[0] == tc[0] ? 0 : 255); 2039 p += 2; 2040 } 2041 } else { 2042 for (i=0; i < pixel_count; ++i) { 2043 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) 2044 p[3] = 0; 2045 p += 4; 2046 } 2047 } 2048 return 1; 2049 } 2050 2051 int expand_palette(png *a, ubyte *palette, int len, int pal_img_n) 2052 { 2053 uint i, pixel_count = a.s.img_x * a.s.img_y; 2054 ubyte *p; 2055 ubyte *temp_out; 2056 ubyte *orig = a.out_; 2057 2058 p = cast(ubyte*) malloc(pixel_count * pal_img_n); 2059 if (p == null) 2060 throw new STBImageException("Out of memory"); 2061 2062 // between here and free(out) below, exitting would leak 2063 temp_out = p; 2064 2065 if (pal_img_n == 3) { 2066 for (i=0; i < pixel_count; ++i) { 2067 int n = orig[i]*4; 2068 p[0] = palette[n ]; 2069 p[1] = palette[n+1]; 2070 p[2] = palette[n+2]; 2071 p += 3; 2072 } 2073 } else { 2074 for (i=0; i < pixel_count; ++i) { 2075 int n = orig[i]*4; 2076 p[0] = palette[n ]; 2077 p[1] = palette[n+1]; 2078 p[2] = palette[n+2]; 2079 p[3] = palette[n+3]; 2080 p += 4; 2081 } 2082 } 2083 free(a.out_); 2084 a.out_ = temp_out; 2085 2086 return 1; 2087 } 2088 2089 int parse_png_file(png *z, int scan, int req_comp) 2090 { 2091 ubyte[1024] palette; 2092 ubyte pal_img_n=0; 2093 ubyte has_trans=0; 2094 ubyte[3] tc; 2095 uint ioff=0, idata_limit=0, i, pal_len=0; 2096 int first=1,k,interlace=0; 2097 stbi *s = z.s; 2098 2099 z.expanded = null; 2100 z.idata = null; 2101 z.out_ = null; 2102 2103 if (!check_png_header(s)) return 0; 2104 2105 if (scan == SCAN_type) return 1; 2106 2107 for (;;) { 2108 chunk c = get_chunk_header(s); 2109 switch (c.type) { 2110 case PNG_TYPE('I','H','D','R'): { 2111 int depth,color,comp,filter; 2112 if (!first) throw new STBImageException("Multiple IHDR, corrupt PNG"); 2113 first = 0; 2114 if (c.length != 13) throw new STBImageException("Bad IHDR len, corrupt PNG"); 2115 s.img_x = get32(s); if (s.img_x > (1 << 24)) throw new STBImageException("Very large image (corrupt?)"); 2116 s.img_y = get32(s); if (s.img_y > (1 << 24)) throw new STBImageException("Very large image (corrupt?)"); 2117 depth = get8(s); if (depth != 8) throw new STBImageException("8bit only, PNG not supported: 8-bit only"); 2118 color = get8(s); if (color > 6) throw new STBImageException("Bad ctype, corrupt PNG"); 2119 if (color == 3) pal_img_n = 3; else if (color & 1) throw new STBImageException("Bad ctype, corrupt PNG"); 2120 comp = get8(s); if (comp) throw new STBImageException("Bad comp method, corrupt PNG"); 2121 filter= get8(s); if (filter) throw new STBImageException("Bad filter method, corrupt PNG"); 2122 interlace = get8(s); if (interlace>1) throw new STBImageException("Bad interlace method, corrupt PNG"); 2123 if (!s.img_x || !s.img_y) throw new STBImageException("0-pixel image, corrupt PNG"); 2124 if (!pal_img_n) { 2125 s.img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); 2126 if ((1 << 30) / s.img_x / s.img_n < s.img_y) throw new STBImageException("Image too large to decode"); 2127 if (scan == SCAN_header) return 1; 2128 } else { 2129 // if paletted, then pal_n is our final components, and 2130 // img_n is # components to decompress/filter. 2131 s.img_n = 1; 2132 if ((1 << 30) / s.img_x / 4 < s.img_y) throw new STBImageException("Too large, corrupt PNG"); 2133 // if SCAN_header, have to scan to see if we have a tRNS 2134 } 2135 break; 2136 } 2137 2138 case PNG_TYPE('P','L','T','E'): { 2139 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2140 if (c.length > 256*3) throw new STBImageException("invalid PLTE, corrupt PNG"); 2141 pal_len = c.length / 3; 2142 if (pal_len * 3 != c.length) throw new STBImageException("invalid PLTE, corrupt PNG"); 2143 for (i=0; i < pal_len; ++i) { 2144 palette[i*4+0] = get8u(s); 2145 palette[i*4+1] = get8u(s); 2146 palette[i*4+2] = get8u(s); 2147 palette[i*4+3] = 255; 2148 } 2149 break; 2150 } 2151 2152 case PNG_TYPE('t','R','N','S'): { 2153 if (first) throw new STBImageException("first not IHDR, cCorrupt PNG"); 2154 if (z.idata) throw new STBImageException("tRNS after IDAT, corrupt PNG"); 2155 if (pal_img_n) { 2156 if (scan == SCAN_header) { s.img_n = 4; return 1; } 2157 if (pal_len == 0) throw new STBImageException("tRNS before PLTE, corrupt PNG"); 2158 if (c.length > pal_len) throw new STBImageException("bad tRNS len, corrupt PNG"); 2159 pal_img_n = 4; 2160 for (i=0; i < c.length; ++i) 2161 palette[i*4+3] = get8u(s); 2162 } else { 2163 if (!(s.img_n & 1)) throw new STBImageException("tRNS with alpha, corrupt PNG"); 2164 if (c.length != cast(uint) s.img_n*2) throw new STBImageException("bad tRNS len, corrupt PNG"); 2165 has_trans = 1; 2166 for (k=0; k < s.img_n; ++k) 2167 tc[k] = cast(ubyte) get16(s); // non 8-bit images will be larger 2168 } 2169 break; 2170 } 2171 2172 case PNG_TYPE('I','D','A','T'): { 2173 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2174 if (pal_img_n && !pal_len) throw new STBImageException("no PLTE, corrupt PNG"); 2175 if (scan == SCAN_header) { s.img_n = pal_img_n; return 1; } 2176 if (ioff + c.length > idata_limit) { 2177 ubyte *p; 2178 if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; 2179 while (ioff + c.length > idata_limit) 2180 idata_limit *= 2; 2181 p = cast(ubyte*) realloc(z.idata, idata_limit); if (p == null) throw new STBImageException("outofmem, cOut of memory"); 2182 z.idata = p; 2183 } 2184 if (!getn(s, z.idata+ioff,c.length)) throw new STBImageException("outofdata, corrupt PNG"); 2185 ioff += c.length; 2186 break; 2187 } 2188 2189 case PNG_TYPE('I','E','N','D'): { 2190 uint raw_len; 2191 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2192 if (scan != SCAN_load) return 1; 2193 if (z.idata == null) throw new STBImageException("no IDAT, corrupt PNG"); 2194 z.expanded = stbi_zlib_decode_malloc_guesssize_headerflag(z.idata, ioff, 16384, cast(int *) &raw_len, 1); 2195 if (z.expanded == null) return 0; // zlib should set error 2196 free(z.idata); z.idata = null; 2197 if ((req_comp == s.img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) 2198 s.img_out_n = s.img_n+1; 2199 else 2200 s.img_out_n = s.img_n; 2201 if (!create_png_image(z, z.expanded, raw_len, s.img_out_n, interlace)) return 0; 2202 if (has_trans) 2203 if (!compute_transparency(z, tc, s.img_out_n)) return 0; 2204 if (pal_img_n) { 2205 // pal_img_n == 3 or 4 2206 s.img_n = pal_img_n; // record the actual colors we had 2207 s.img_out_n = pal_img_n; 2208 if (req_comp >= 3) s.img_out_n = req_comp; 2209 if (!expand_palette(z, palette.ptr, pal_len, s.img_out_n)) 2210 return 0; 2211 } 2212 free(z.expanded); z.expanded = null; 2213 return 1; 2214 } 2215 2216 default: 2217 // if critical, fail 2218 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2219 if ((c.type & (1 << 29)) == 0) { 2220 2221 throw new STBImageException("PNG not supported: unknown chunk type"); 2222 } 2223 skip(s, c.length); 2224 break; 2225 } 2226 // end of chunk, read and skip CRC 2227 get32(s); 2228 } 2229 } 2230 2231 ubyte *do_png(png *p, int *x, int *y, int *n, int req_comp) 2232 { 2233 ubyte *result=null; 2234 if (req_comp < 0 || req_comp > 4) 2235 throw new STBImageException("Internal error: bad req_comp"); 2236 if (parse_png_file(p, SCAN_load, req_comp)) { 2237 result = p.out_; 2238 p.out_ = null; 2239 if (req_comp && req_comp != p.s.img_out_n) { 2240 result = convert_format(result, p.s.img_out_n, req_comp, p.s.img_x, p.s.img_y); 2241 p.s.img_out_n = req_comp; 2242 if (result == null) return result; 2243 } 2244 *x = p.s.img_x; 2245 *y = p.s.img_y; 2246 if (n) *n = p.s.img_n; 2247 } 2248 free(p.out_); p.out_ = null; 2249 free(p.expanded); p.expanded = null; 2250 free(p.idata); p.idata = null; 2251 2252 return result; 2253 } 2254 2255 ubyte *stbi_png_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2256 { 2257 png p; 2258 p.s = s; 2259 return do_png(&p, x,y,comp,req_comp); 2260 } 2261 2262 void stbi_png_test(stbi *s) 2263 { 2264 int r = check_png_header(s); 2265 if (r == 0) 2266 throw new STBImageException("Couldn't decode PNG header"); 2267 } 2268 2269 // Microsoft/Windows BMP image 2270 2271 void stbi_bmp_test(stbi *s) 2272 { 2273 if (get8(s) != 'B') throw new STBImageException("Couldn't decode BMP header"); 2274 if (get8(s) != 'M') throw new STBImageException("Couldn't decode BMP header"); 2275 get32le(s); // discard filesize 2276 get16le(s); // discard reserved 2277 get16le(s); // discard reserved 2278 get32le(s); // discard data offset 2279 int sz = get32le(s); 2280 if (sz == 12 || sz == 40 || sz == 56 || sz == 108) 2281 return; 2282 2283 throw new STBImageException("Couldn't decode BMP header"); 2284 } 2285 2286 2287 // returns 0..31 for the highest set bit 2288 int high_bit(uint z) 2289 { 2290 int n=0; 2291 if (z == 0) return -1; 2292 if (z >= 0x10000) n += 16, z >>= 16; 2293 if (z >= 0x00100) n += 8, z >>= 8; 2294 if (z >= 0x00010) n += 4, z >>= 4; 2295 if (z >= 0x00004) n += 2, z >>= 2; 2296 if (z >= 0x00002) n += 1, z >>= 1; 2297 return n; 2298 } 2299 2300 int bitcount(uint a) 2301 { 2302 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 2303 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 2304 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits 2305 a = (a + (a >> 8)); // max 16 per 8 bits 2306 a = (a + (a >> 16)); // max 32 per 8 bits 2307 return a & 0xff; 2308 } 2309 2310 int shiftsigned(int v, int shift, int bits) 2311 { 2312 int result; 2313 int z=0; 2314 2315 if (shift < 0) v <<= -shift; 2316 else v >>= shift; 2317 result = v; 2318 2319 z = bits; 2320 while (z < 8) { 2321 result += v >> z; 2322 z += bits; 2323 } 2324 return result; 2325 } 2326 2327 ubyte *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2328 { 2329 ubyte *out_; 2330 uint mr=0,mg=0,mb=0,ma=0, fake_a=0; 2331 ubyte[4][256] pal; 2332 int psize=0,i,j,compress=0,width; 2333 int bpp, flip_vertically, pad, target, offset, hsz; 2334 if (get8(s) != 'B' || get8(s) != 'M') throw new STBImageException("not BMP, Corrupt BMP"); 2335 get32le(s); // discard filesize 2336 get16le(s); // discard reserved 2337 get16le(s); // discard reserved 2338 offset = get32le(s); 2339 hsz = get32le(s); 2340 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) throw new STBImageException("unknown BMP, BMP type not supported: unknown"); 2341 if (hsz == 12) { 2342 s.img_x = get16le(s); 2343 s.img_y = get16le(s); 2344 } else { 2345 s.img_x = get32le(s); 2346 s.img_y = get32le(s); 2347 } 2348 if (get16le(s) != 1) throw new STBImageException("bad BMP"); 2349 bpp = get16le(s); 2350 if (bpp == 1) throw new STBImageException("monochrome, BMP type not supported: 1-bit"); 2351 flip_vertically = (cast(int) s.img_y) > 0; 2352 s.img_y = abs(cast(int) s.img_y); 2353 if (hsz == 12) { 2354 if (bpp < 24) 2355 psize = (offset - 14 - 24) / 3; 2356 } else { 2357 compress = get32le(s); 2358 if (compress == 1 || compress == 2) throw new STBImageException("BMP RLE, BMP type not supported: RLE"); 2359 get32le(s); // discard sizeof 2360 get32le(s); // discard hres 2361 get32le(s); // discard vres 2362 get32le(s); // discard colorsused 2363 get32le(s); // discard max important 2364 if (hsz == 40 || hsz == 56) { 2365 if (hsz == 56) { 2366 get32le(s); 2367 get32le(s); 2368 get32le(s); 2369 get32le(s); 2370 } 2371 if (bpp == 16 || bpp == 32) { 2372 mr = mg = mb = 0; 2373 if (compress == 0) { 2374 if (bpp == 32) { 2375 mr = 0xffu << 16; 2376 mg = 0xffu << 8; 2377 mb = 0xffu << 0; 2378 ma = 0xffu << 24; 2379 fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255 2380 } else { 2381 mr = 31u << 10; 2382 mg = 31u << 5; 2383 mb = 31u << 0; 2384 } 2385 } else if (compress == 3) { 2386 mr = get32le(s); 2387 mg = get32le(s); 2388 mb = get32le(s); 2389 // not documented, but generated by photoshop and handled by mspaint 2390 if (mr == mg && mg == mb) { 2391 // ?!?!? 2392 throw new STBImageException("bad BMP"); 2393 } 2394 } else 2395 throw new STBImageException("bad BMP"); 2396 } 2397 } else { 2398 assert(hsz == 108); 2399 mr = get32le(s); 2400 mg = get32le(s); 2401 mb = get32le(s); 2402 ma = get32le(s); 2403 get32le(s); // discard color space 2404 for (i=0; i < 12; ++i) 2405 get32le(s); // discard color space parameters 2406 } 2407 if (bpp < 16) 2408 psize = (offset - 14 - hsz) >> 2; 2409 } 2410 s.img_n = ma ? 4 : 3; 2411 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 2412 target = req_comp; 2413 else 2414 target = s.img_n; // if they want monochrome, we'll post-convert 2415 out_ = cast(ubyte*) malloc(target * s.img_x * s.img_y); 2416 if (!out_) throw new STBImageException("Out of memory"); 2417 if (bpp < 16) { 2418 int z=0; 2419 if (psize == 0 || psize > 256) { free(out_); throw new STBImageException("invalid, Corrupt BMP"); } 2420 for (i=0; i < psize; ++i) { 2421 pal[i][2] = get8u(s); 2422 pal[i][1] = get8u(s); 2423 pal[i][0] = get8u(s); 2424 if (hsz != 12) get8(s); 2425 pal[i][3] = 255; 2426 } 2427 skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); 2428 if (bpp == 4) width = (s.img_x + 1) >> 1; 2429 else if (bpp == 8) width = s.img_x; 2430 else { free(out_); throw new STBImageException("bad bpp, corrupt BMP"); } 2431 pad = (-width)&3; 2432 for (j=0; j < cast(int) s.img_y; ++j) { 2433 for (i=0; i < cast(int) s.img_x; i += 2) { 2434 int v=get8(s),v2=0; 2435 if (bpp == 4) { 2436 v2 = v & 15; 2437 v >>= 4; 2438 } 2439 out_[z++] = pal[v][0]; 2440 out_[z++] = pal[v][1]; 2441 out_[z++] = pal[v][2]; 2442 if (target == 4) out_[z++] = 255; 2443 if (i+1 == cast(int) s.img_x) break; 2444 v = (bpp == 8) ? get8(s) : v2; 2445 out_[z++] = pal[v][0]; 2446 out_[z++] = pal[v][1]; 2447 out_[z++] = pal[v][2]; 2448 if (target == 4) out_[z++] = 255; 2449 } 2450 skip(s, pad); 2451 } 2452 } else { 2453 int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; 2454 int z = 0; 2455 int easy=0; 2456 skip(s, offset - 14 - hsz); 2457 if (bpp == 24) width = 3 * s.img_x; 2458 else if (bpp == 16) width = 2*s.img_x; 2459 else /* bpp = 32 and pad = 0 */ width=0; 2460 pad = (-width) & 3; 2461 if (bpp == 24) { 2462 easy = 1; 2463 } else if (bpp == 32) { 2464 if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) 2465 easy = 2; 2466 } 2467 if (!easy) { 2468 if (!mr || !mg || !mb) { free(out_); throw new STBImageException("bad masks, corrupt BMP"); } 2469 // right shift amt to put high bit in position #7 2470 rshift = high_bit(mr)-7; rcount = bitcount(mr); 2471 gshift = high_bit(mg)-7; gcount = bitcount(mr); 2472 bshift = high_bit(mb)-7; bcount = bitcount(mr); 2473 ashift = high_bit(ma)-7; acount = bitcount(mr); 2474 } 2475 for (j=0; j < cast(int) s.img_y; ++j) { 2476 if (easy) { 2477 for (i=0; i < cast(int) s.img_x; ++i) { 2478 int a; 2479 out_[z+2] = get8u(s); 2480 out_[z+1] = get8u(s); 2481 out_[z+0] = get8u(s); 2482 z += 3; 2483 a = (easy == 2 ? get8(s) : 255); 2484 if (target == 4) out_[z++] = cast(ubyte) a; 2485 } 2486 } else { 2487 for (i=0; i < cast(int) s.img_x; ++i) { 2488 uint v = (bpp == 16 ? get16le(s) : get32le(s)); 2489 int a; 2490 out_[z++] = cast(ubyte) shiftsigned(v & mr, rshift, rcount); 2491 out_[z++] = cast(ubyte) shiftsigned(v & mg, gshift, gcount); 2492 out_[z++] = cast(ubyte) shiftsigned(v & mb, bshift, bcount); 2493 a = (ma ? shiftsigned(v & ma, ashift, acount) : 255); 2494 if (target == 4) out_[z++] = cast(ubyte) a; 2495 } 2496 } 2497 skip(s, pad); 2498 } 2499 } 2500 if (flip_vertically) { 2501 ubyte t; 2502 for (j=0; j < cast(int) s.img_y>>1; ++j) { 2503 ubyte *p1 = out_ + j *s.img_x*target; 2504 ubyte *p2 = out_ + (s.img_y-1-j)*s.img_x*target; 2505 for (i=0; i < cast(int) s.img_x*target; ++i) { 2506 t = p1[i], p1[i] = p2[i], p2[i] = t; 2507 } 2508 } 2509 } 2510 2511 if (req_comp && req_comp != target) { 2512 out_ = convert_format(out_, target, req_comp, s.img_x, s.img_y); 2513 if (out_ == null) return out_; // convert_format frees input on failure 2514 } 2515 2516 *x = s.img_x; 2517 *y = s.img_y; 2518 if (comp) *comp = s.img_n; 2519 return out_; 2520 } 2521 2522 ubyte *stbi_bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2523 { 2524 return bmp_load(s, x,y,comp,req_comp); 2525 } 2526 2527 // ************************************************************************************************* 2528 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb 2529 struct stbi_gif_lzw 2530 { 2531 short prefix; 2532 ubyte first; 2533 ubyte suffix; 2534 } 2535 2536 struct stbi_gif 2537 { 2538 int w,h; 2539 ubyte *out_; // output buffer (always 4 components) 2540 int flags, bgindex, ratio, transparent, eflags; 2541 ubyte[4][256] pal; 2542 ubyte[4][256] lpal; 2543 stbi_gif_lzw[4096] codes; 2544 ubyte *color_table; 2545 int parse, step; 2546 int lflags; 2547 int start_x, start_y; 2548 int max_x, max_y; 2549 int cur_x, cur_y; 2550 int line_size; 2551 } 2552 2553 void stbi_gif_test(stbi *s) 2554 { 2555 int sz; 2556 if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') 2557 throw new STBImageException("Couldn't decode GIF header"); 2558 sz = get8(s); 2559 if (sz != '9' && sz != '7') 2560 throw new STBImageException("Couldn't decode GIF header"); 2561 if (get8(s) != 'a') 2562 throw new STBImageException("Couldn't decode GIF header"); 2563 } 2564 2565 void stbi_gif_parse_colortable(stbi *s, ubyte[4][256] pal, int num_entries, int transp) 2566 { 2567 int i; 2568 for (i=0; i < num_entries; ++i) { 2569 pal[i][2] = get8u(s); 2570 pal[i][1] = get8u(s); 2571 pal[i][0] = get8u(s); 2572 pal[i][3] = transp ? 0 : 255; 2573 } 2574 } 2575 2576 int stbi_gif_header(stbi *s, stbi_gif *g, int *comp, int is_info) 2577 { 2578 ubyte version_; 2579 if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') 2580 throw new STBImageException("not GIF, corrupt GIF"); 2581 2582 version_ = get8u(s); 2583 if (version_ != '7' && version_ != '9') throw new STBImageException("not GIF, corrupt GIF"); 2584 if (get8(s) != 'a') throw new STBImageException("not GIF, corrupt GIF"); 2585 2586 g.w = get16le(s); 2587 g.h = get16le(s); 2588 g.flags = get8(s); 2589 g.bgindex = get8(s); 2590 g.ratio = get8(s); 2591 g.transparent = -1; 2592 2593 if (comp != null) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments 2594 2595 if (is_info) return 1; 2596 2597 if (g.flags & 0x80) 2598 stbi_gif_parse_colortable(s,g.pal, 2 << (g.flags & 7), -1); 2599 2600 return 1; 2601 } 2602 2603 void stbi_out_gif_code(stbi_gif *g, ushort code) 2604 { 2605 ubyte *p; 2606 ubyte *c; 2607 2608 // recurse to decode the prefixes, since the linked-list is backwards, 2609 // and working backwards through an interleaved image would be nasty 2610 if (g.codes[code].prefix >= 0) 2611 stbi_out_gif_code(g, g.codes[code].prefix); 2612 2613 if (g.cur_y >= g.max_y) return; 2614 2615 p = (&g.out_[g.cur_x + g.cur_y]); 2616 c = &g.color_table[g.codes[code].suffix * 4]; 2617 2618 if (c[3] >= 128) { 2619 p[0] = c[2]; 2620 p[1] = c[1]; 2621 p[2] = c[0]; 2622 p[3] = c[3]; 2623 } 2624 g.cur_x += 4; 2625 2626 if (g.cur_x >= g.max_x) { 2627 g.cur_x = g.start_x; 2628 g.cur_y += g.step; 2629 2630 while (g.cur_y >= g.max_y && g.parse > 0) { 2631 g.step = (1 << g.parse) * g.line_size; 2632 g.cur_y = g.start_y + (g.step >> 1); 2633 --g.parse; 2634 } 2635 } 2636 } 2637 2638 ubyte *stbi_process_gif_raster(stbi *s, stbi_gif *g) 2639 { 2640 ubyte lzw_cs; 2641 int len, code; 2642 uint first; 2643 int codesize, codemask, avail, oldcode, bits, valid_bits, clear; 2644 stbi_gif_lzw *p; 2645 2646 lzw_cs = get8u(s); 2647 clear = 1 << lzw_cs; 2648 first = 1; 2649 codesize = lzw_cs + 1; 2650 codemask = (1 << codesize) - 1; 2651 bits = 0; 2652 valid_bits = 0; 2653 for (code = 0; code < clear; code++) { 2654 g.codes[code].prefix = -1; 2655 g.codes[code].first = cast(ubyte) code; 2656 g.codes[code].suffix = cast(ubyte) code; 2657 } 2658 2659 // support no starting clear code 2660 avail = clear+2; 2661 oldcode = -1; 2662 2663 len = 0; 2664 for(;;) { 2665 if (valid_bits < codesize) { 2666 if (len == 0) { 2667 len = get8(s); // start new block 2668 if (len == 0) 2669 return g.out_; 2670 } 2671 --len; 2672 bits |= cast(int) get8(s) << valid_bits; 2673 valid_bits += 8; 2674 } else { 2675 int code_ = bits & codemask; 2676 bits >>= codesize; 2677 valid_bits -= codesize; 2678 // @OPTIMIZE: is there some way we can accelerate the non-clear path? 2679 if (code_ == clear) { // clear code 2680 codesize = lzw_cs + 1; 2681 codemask = (1 << codesize) - 1; 2682 avail = clear + 2; 2683 oldcode = -1; 2684 first = 0; 2685 } else if (code_ == clear + 1) { // end of stream code 2686 skip(s, len); 2687 while ((len = get8(s)) > 0) 2688 skip(s,len); 2689 return g.out_; 2690 } else if (code_ <= avail) { 2691 if (first) throw new STBImageException("no clear code, corrupt GIF"); 2692 2693 if (oldcode >= 0) { 2694 p = &g.codes[avail++]; 2695 if (avail > 4096) throw new STBImageException("too many codes, corrupt GIF"); 2696 p.prefix = cast(short) oldcode; 2697 p.first = g.codes[oldcode].first; 2698 p.suffix = (code_ == avail) ? p.first : g.codes[code_].first; 2699 } else if (code_ == avail) 2700 throw new STBImageException("illegal code in raster, corrupt GIF"); 2701 2702 stbi_out_gif_code(g, cast(ushort) code); 2703 2704 if ((avail & codemask) == 0 && avail <= 0x0FFF) { 2705 codesize++; 2706 codemask = (1 << codesize) - 1; 2707 } 2708 2709 oldcode = code_; 2710 } else { 2711 throw new STBImageException("illegal code in raster, corrupt GIF"); 2712 } 2713 } 2714 } 2715 } 2716 2717 void stbi_fill_gif_background(stbi_gif *g) 2718 { 2719 int i; 2720 ubyte *c = g.pal[g.bgindex].ptr; 2721 // @OPTIMIZE: write a dword at a time 2722 for (i = 0; i < g.w * g.h * 4; i += 4) { 2723 ubyte *p = &g.out_[i]; 2724 p[0] = c[2]; 2725 p[1] = c[1]; 2726 p[2] = c[0]; 2727 p[3] = c[3]; 2728 } 2729 } 2730 2731 // this function is designed to support animated gifs, although stb_image doesn't support it 2732 ubyte *stbi_gif_load_next(stbi *s, stbi_gif *g, int *comp, int req_comp) 2733 { 2734 int i; 2735 ubyte *old_out = null; 2736 2737 if (g.out_ == null) { 2738 if (!stbi_gif_header(s, g, comp,0)) return null; // failure_reason set by stbi_gif_header 2739 g.out_ = cast(ubyte*) malloc(4 * g.w * g.h); 2740 if (g.out_ == null) throw new STBImageException("Out of memory"); 2741 stbi_fill_gif_background(g); 2742 } else { 2743 // animated-gif-only path 2744 if (((g.eflags & 0x1C) >> 2) == 3) { 2745 old_out = g.out_; 2746 g.out_ = cast(ubyte*) malloc(4 * g.w * g.h); 2747 if (g.out_ == null) throw new STBImageException("Out of memory"); 2748 memcpy(g.out_, old_out, g.w*g.h*4); 2749 } 2750 } 2751 2752 for (;;) { 2753 switch (get8(s)) { 2754 case 0x2C: /* Image Descriptor */ 2755 { 2756 int x, y, w, h; 2757 ubyte *o; 2758 2759 x = get16le(s); 2760 y = get16le(s); 2761 w = get16le(s); 2762 h = get16le(s); 2763 if (((x + w) > (g.w)) || ((y + h) > (g.h))) 2764 throw new STBImageException("bad Image Descriptor, corrupt GIF"); 2765 2766 g.line_size = g.w * 4; 2767 g.start_x = x * 4; 2768 g.start_y = y * g.line_size; 2769 g.max_x = g.start_x + w * 4; 2770 g.max_y = g.start_y + h * g.line_size; 2771 g.cur_x = g.start_x; 2772 g.cur_y = g.start_y; 2773 2774 g.lflags = get8(s); 2775 2776 if (g.lflags & 0x40) { 2777 g.step = 8 * g.line_size; // first interlaced spacing 2778 g.parse = 3; 2779 } else { 2780 g.step = g.line_size; 2781 g.parse = 0; 2782 } 2783 2784 if (g.lflags & 0x80) { 2785 stbi_gif_parse_colortable(s,g.lpal, 2 << (g.lflags & 7), g.eflags & 0x01 ? g.transparent : -1); 2786 g.color_table = &g.lpal[0][0]; 2787 } else if (g.flags & 0x80) { 2788 for (i=0; i < 256; ++i) // @OPTIMIZE: reset only the previous transparent 2789 g.pal[i][3] = 255; 2790 if (g.transparent >= 0 && (g.eflags & 0x01)) 2791 g.pal[g.transparent][3] = 0; 2792 g.color_table = &g.pal[0][0]; 2793 } else 2794 throw new STBImageException("missing color table, corrupt GIF"); 2795 2796 o = stbi_process_gif_raster(s, g); 2797 if (o == null) return null; 2798 2799 if (req_comp && req_comp != 4) 2800 o = convert_format(o, 4, req_comp, g.w, g.h); 2801 return o; 2802 } 2803 2804 case 0x21: // Comment Extension. 2805 { 2806 int len; 2807 if (get8(s) == 0xF9) { // Graphic Control Extension. 2808 len = get8(s); 2809 if (len == 4) { 2810 g.eflags = get8(s); 2811 get16le(s); // delay 2812 g.transparent = get8(s); 2813 } else { 2814 skip(s, len); 2815 break; 2816 } 2817 } 2818 while ((len = get8(s)) != 0) 2819 skip(s, len); 2820 break; 2821 } 2822 2823 case 0x3B: // gif stream termination code 2824 return cast(ubyte*) 1; 2825 2826 default: 2827 throw new STBImageException("unknown code, corrupt GIF"); 2828 } 2829 } 2830 } 2831 2832 ubyte *stbi_gif_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2833 { 2834 ubyte *u = null; 2835 stbi_gif g={0}; 2836 2837 u = stbi_gif_load_next(s, &g, comp, req_comp); 2838 if (u == cast(void *) 1) u = null; // end of animated gif marker 2839 if (u) { 2840 *x = g.w; 2841 *y = g.h; 2842 } 2843 2844 return u; 2845 } 2846 2847