1 2 /// D translation of stb_image-1.33 (http://nothings.org/stb_image.c) 3 /// 4 /// This port only supports: 5 /// $(UL 6 /// $(LI PNG 8-bit-per-channel only.) 7 /// $(LI JPEG baseline (no JPEG progressive).) 8 /// $(LI GIF.) 9 /// $(LI BMP non-1bpp, non-RLE.) 10 /// ) 11 /// 12 /// TODO: 13 /// $(UL 14 /// $(LI Support a range as input.) 15 /// ) 16 17 //============================ Contributors ========================= 18 // 19 // Image formats Optimizations & bugfixes 20 // Sean Barrett (jpeg, png, bmp) Fabian "ryg" Giesen 21 // Nicolas Schulz (hdr, psd) 22 // Jonathan Dummer (tga) Bug fixes & warning fixes 23 // Jean-Marc Lienher (gif) Marc LeBlanc 24 // Tom Seddon (pic) Christpher Lloyd 25 // Thatcher Ulrich (psd) Dave Moore 26 // Won Chun 27 // the Horde3D community 28 // Extensions, features Janez Zemva 29 // Jetro Lauha (stbi_info) Jonathan Blow 30 // James "moose2000" Brown (iPhone PNG) Laurent Gomila 31 // Ben "Disch" Wenger (io callbacks) Aruelien Pocheville 32 // Martin "SpartanJ" Golini Ryamond Barbiero 33 // David Woo 34 35 module gfm.image.stb_image; 36 37 import core.stdc.stdlib; 38 import core.stdc.string; 39 40 import gfm.math.vector, 41 gfm.image.bitmap; 42 43 enum STBI_VERSION = 1; 44 45 /// The exception type thrown when loading an image failed. 46 class STBImageException : Exception 47 { 48 public 49 { 50 this(string msg) 51 { 52 super(msg); 53 } 54 } 55 } 56 57 enum : int 58 { 59 STBI_default = 0, // only used for req_comp 60 STBI_grey = 1, 61 STBI_grey_alpha = 2, 62 STBI_rgb = 3, 63 STBI_rgb_alpha = 4 64 }; 65 66 // define faster low-level operations (typically SIMD support) 67 68 69 uint stbi_lrot(uint x, uint y) 70 { 71 return (x << y) | (x >> (32 - y)); 72 } 73 74 // stbi structure is our basic context used by all images, so it 75 // contains all the IO context, plus some basic image information 76 struct stbi 77 { 78 uint img_x, img_y; 79 int img_n, img_out_n; 80 81 int buflen; 82 ubyte buffer_start[128]; 83 84 const(ubyte) *img_buffer; 85 const(ubyte) *img_buffer_end; 86 const(ubyte) *img_buffer_original; 87 } 88 89 90 // initialize a memory-decode context 91 void start_mem(stbi *s, const(ubyte)*buffer, int len) 92 { 93 s.img_buffer = buffer; 94 s.img_buffer_original = buffer; 95 s.img_buffer_end = buffer+len; 96 } 97 98 void stbi_rewind(stbi *s) 99 { 100 // conceptually rewind SHOULD rewind to the beginning of the stream, 101 // but we just rewind to the beginning of the initial buffer, because 102 // we only use it after doing 'test', which only ever looks at at most 92 bytes 103 s.img_buffer = s.img_buffer_original; 104 } 105 106 107 ubyte *stbi_load_main(stbi *s, int *x, int *y, int *comp, int req_comp) 108 { 109 try 110 { 111 stbi_jpeg_test(s); 112 stbi_rewind(s); 113 return stbi_jpeg_load(s,x,y,comp,req_comp); 114 } 115 catch(STBImageException e) 116 { 117 stbi_rewind(s); 118 } 119 120 try 121 { 122 stbi_png_test(s); 123 stbi_rewind(s); 124 return stbi_png_load(s,x,y,comp,req_comp); 125 } 126 catch(STBImageException e) 127 { 128 stbi_rewind(s); 129 } 130 131 try 132 { 133 stbi_bmp_test(s); 134 stbi_rewind(s); 135 return stbi_bmp_load(s,x,y,comp,req_comp); 136 } 137 catch(STBImageException e) 138 { 139 stbi_rewind(s); 140 } 141 142 try 143 { 144 stbi_gif_test(s); 145 stbi_rewind(s); 146 return stbi_gif_load(s,x,y,comp,req_comp); 147 } 148 catch(STBImageException e) 149 { 150 stbi_rewind(s); 151 } 152 153 throw new STBImageException("Image not of any known type, or corrupt"); 154 } 155 156 /// Loads an image from memory. 157 /// Throws: STBImageException on error. 158 ubyte* stbi_load_from_memory(void[] buffer, out int width, out int height, out int components, int requestedComponents) 159 { 160 stbi s; 161 start_mem(&s, cast(ubyte*)buffer.ptr, cast(int)(buffer.length)); 162 return stbi_load_main(&s, &width, &height, &components, requestedComponents); 163 } 164 165 /// Frees an image loaded by stb_image. 166 void stbi_image_free(void *retval_from_stbi_load) 167 { 168 free(retval_from_stbi_load); 169 } 170 171 /// Load an image from memory and puts it in a Bitmap. 172 /// See_also: Bitmap. 173 /// Throws: STBImageException on error. 174 Bitmap!vec4ub stbiLoadImage(void[] buffer) 175 { 176 int width, height, components; 177 ubyte* data = stbi_load_from_memory(buffer, width, height, components, 4); 178 scope(exit) stbi_image_free(data); 179 180 if(components != 4) 181 throw new STBImageException("Could't convert image to 4 components"); 182 183 auto result = Bitmap!vec4ub(vec2i(width, height)); 184 memcpy(result.ptr, data, width * height); 185 return result; 186 } 187 188 // 189 // Common code used by all image loaders 190 // 191 192 enum : int 193 { 194 SCAN_load=0, 195 SCAN_type, 196 SCAN_header 197 }; 198 199 200 int get8(stbi *s) 201 { 202 if (s.img_buffer < s.img_buffer_end) 203 return *s.img_buffer++; 204 205 return 0; 206 } 207 208 int at_eof(stbi *s) 209 { 210 return s.img_buffer >= s.img_buffer_end; 211 } 212 213 ubyte get8u(stbi *s) 214 { 215 return cast(ubyte) get8(s); 216 } 217 218 void skip(stbi *s, int n) 219 { 220 s.img_buffer += n; 221 } 222 223 int getn(stbi *s, ubyte *buffer, int n) 224 { 225 if (s.img_buffer+n <= s.img_buffer_end) { 226 memcpy(buffer, s.img_buffer, n); 227 s.img_buffer += n; 228 return 1; 229 } else 230 return 0; 231 } 232 233 int get16(stbi *s) 234 { 235 int z = get8(s); 236 return (z << 8) + get8(s); 237 } 238 239 uint get32(stbi *s) 240 { 241 uint z = get16(s); 242 return (z << 16) + get16(s); 243 } 244 245 int get16le(stbi *s) 246 { 247 int z = get8(s); 248 return z + (get8(s) << 8); 249 } 250 251 uint get32le(stbi *s) 252 { 253 uint z = get16le(s); 254 return z + (get16le(s) << 16); 255 } 256 257 // 258 // generic converter from built-in img_n to req_comp 259 // individual types do this automatically as much as possible (e.g. jpeg 260 // does all cases internally since it needs to colorspace convert anyway, 261 // and it never has alpha, so very few cases ). png can automatically 262 // interleave an alpha=255 channel, but falls back to this for other cases 263 // 264 // assume data buffer is malloced, so malloc a new one and free that one 265 // only failure mode is malloc failing 266 267 ubyte compute_y(int r, int g, int b) 268 { 269 return cast(ubyte) (((r*77) + (g*150) + (29*b)) >> 8); 270 } 271 272 ubyte *convert_format(ubyte *data, int img_n, int req_comp, uint x, uint y) 273 { 274 int i,j; 275 ubyte *good; 276 277 if (req_comp == img_n) return data; 278 assert(req_comp >= 1 && req_comp <= 4); 279 280 good = cast(ubyte*) malloc(req_comp * x * y); 281 if (good == null) { 282 free(data); 283 throw new STBImageException("Out of memory"); 284 } 285 286 for (j=0; j < cast(int) y; ++j) { 287 ubyte *src = data + j * x * img_n ; 288 ubyte *dest = good + j * x * req_comp; 289 290 // convert source image with img_n components to one with req_comp components; 291 // avoid switch per pixel, so use switch per scanline and massive macros 292 switch (img_n * 8 + req_comp) 293 { 294 case 1 * 8 + 2: 295 for(i=x-1; i >= 0; --i, src += 1, dest += 2) 296 dest[0] = src[0], dest[1] = 255; 297 break; 298 case 1 * 8 + 3: 299 for(i=x-1; i >= 0; --i, src += 1, dest += 3) 300 dest[0]=dest[1]=dest[2]=src[0]; 301 break; 302 case 1 * 8 + 4: 303 for(i=x-1; i >= 0; --i, src += 1, dest += 4) 304 dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; 305 break; 306 case 2 * 8 + 1: 307 for(i=x-1; i >= 0; --i, src += 2, dest += 1) 308 dest[0]=src[0]; 309 break; 310 case 2 * 8 + 3: 311 for(i=x-1; i >= 0; --i, src += 2, dest += 3) 312 dest[0]=dest[1]=dest[2]=src[0]; 313 break; 314 case 2 * 8 + 4: 315 for(i=x-1; i >= 0; --i, src += 2, dest += 4) 316 dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; 317 break; 318 case 3 * 8 + 4: 319 for(i=x-1; i >= 0; --i, src += 3, dest += 4) 320 dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; 321 break; 322 case 3 * 8 + 1: 323 for(i=x-1; i >= 0; --i, src += 3, dest += 1) 324 dest[0]=compute_y(src[0],src[1],src[2]); 325 break; 326 case 3 * 8 + 2: 327 for(i=x-1; i >= 0; --i, src += 3, dest += 2) 328 dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; 329 break; 330 case 4 * 8 + 1: 331 for(i=x-1; i >= 0; --i, src += 4, dest += 1) 332 dest[0]=compute_y(src[0],src[1],src[2]); 333 break; 334 case 4 * 8 + 2: 335 for(i=x-1; i >= 0; --i, src += 4, dest += 2) 336 dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; 337 break; 338 case 4 * 8 + 3: 339 for(i=x-1; i >= 0; --i, src += 4, dest += 3) 340 dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; 341 break; 342 default: assert(0); 343 } 344 } 345 346 free(data); 347 return good; 348 } 349 350 // 351 // "baseline" JPEG/JFIF decoder (not actually fully baseline implementation) 352 // 353 // simple implementation 354 // - channel subsampling of at most 2 in each dimension 355 // - doesn't support delayed output of y-dimension 356 // - simple interface (only one output format: 8-bit interleaved RGB) 357 // - doesn't try to recover corrupt jpegs 358 // - doesn't allow partial loading, loading multiple at once 359 // - still fast on x86 (copying globals into locals doesn't help x86) 360 // - allocates lots of intermediate memory (full size of all components) 361 // - non-interleaved case requires this anyway 362 // - allows good upsampling (see next) 363 // high-quality 364 // - upsampled channels are bilinearly interpolated, even across blocks 365 // - quality integer IDCT derived from IJG's 'slow' 366 // performance 367 // - fast huffman; reasonable integer IDCT 368 // - uses a lot of intermediate memory, could cache poorly 369 // - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4 370 // stb_jpeg: 1.34 seconds (MSVC6, default release build) 371 // stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro) 372 // IJL11.dll: 1.08 seconds (compiled by intel) 373 // IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG) 374 // IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro) 375 376 // huffman decoding acceleration 377 enum FAST_BITS = 9; // larger handles more cases; smaller stomps less cache 378 379 struct huffman 380 { 381 ubyte[1 << FAST_BITS] fast; 382 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win 383 ushort[256] code; 384 ubyte[256] values; 385 ubyte[257] size; 386 uint[18] maxcode; 387 int[17] delta; // old 'firstsymbol' - old 'firstcode' 388 } 389 390 struct jpeg 391 { 392 stbi *s; 393 huffman[4] huff_dc; 394 huffman[4] huff_ac; 395 ubyte[64][4] dequant; 396 397 // sizes for components, interleaved MCUs 398 int img_h_max, img_v_max; 399 int img_mcu_x, img_mcu_y; 400 int img_mcu_w, img_mcu_h; 401 402 // definition of jpeg image component 403 struct img_comp_ 404 { 405 int id; 406 int h,v; 407 int tq; 408 int hd,ha; 409 int dc_pred; 410 411 int x,y,w2,h2; 412 ubyte *data; 413 void *raw_data; 414 ubyte *linebuf; 415 } 416 417 img_comp_[4] img_comp; 418 419 uint code_buffer; // jpeg entropy-coded buffer 420 int code_bits; // number of valid bits 421 ubyte marker; // marker seen while filling entropy buffer 422 int nomore; // flag if we saw a marker so must stop 423 424 int scan_n; 425 int[4] order; 426 int restart_interval, todo; 427 } 428 429 430 int build_huffman(huffman *h, int *count) 431 { 432 int i,j,k=0,code; 433 // build size list for each symbol (from JPEG spec) 434 for (i=0; i < 16; ++i) 435 for (j=0; j < count[i]; ++j) 436 h.size[k++] = cast(ubyte) (i+1); 437 h.size[k] = 0; 438 439 // compute actual symbols (from jpeg spec) 440 code = 0; 441 k = 0; 442 for(j=1; j <= 16; ++j) { 443 // compute delta to add to code to compute symbol id 444 h.delta[j] = k - code; 445 if (h.size[k] == j) { 446 while (h.size[k] == j) 447 h.code[k++] = cast(ushort) (code++); 448 if (code-1 >= (1 << j)) 449 throw new STBImageException("Bad code lengths, corrupt JPEG"); 450 } 451 // compute largest code + 1 for this size, preshifted as needed later 452 h.maxcode[j] = code << (16-j); 453 code <<= 1; 454 } 455 h.maxcode[j] = 0xffffffff; 456 457 // build non-spec acceleration table; 255 is flag for not-accelerated 458 memset(h.fast.ptr, 255, 1 << FAST_BITS); 459 for (i=0; i < k; ++i) { 460 int s = h.size[i]; 461 if (s <= FAST_BITS) { 462 int c = h.code[i] << (FAST_BITS-s); 463 int m = 1 << (FAST_BITS-s); 464 for (j=0; j < m; ++j) { 465 h.fast[c+j] = cast(ubyte) i; 466 } 467 } 468 } 469 return 1; 470 } 471 472 void grow_buffer_unsafe(jpeg *j) 473 { 474 do { 475 int b = j.nomore ? 0 : get8(j.s); 476 if (b == 0xff) { 477 int c = get8(j.s); 478 if (c != 0) { 479 j.marker = cast(ubyte) c; 480 j.nomore = 1; 481 return; 482 } 483 } 484 j.code_buffer |= b << (24 - j.code_bits); 485 j.code_bits += 8; 486 } while (j.code_bits <= 24); 487 } 488 489 // (1 << n) - 1 490 static immutable uint bmask[17]=[0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535]; 491 492 // decode a jpeg huffman value from the bitstream 493 int decode(jpeg *j, huffman *h) 494 { 495 uint temp; 496 int c,k; 497 498 if (j.code_bits < 16) grow_buffer_unsafe(j); 499 500 // look at the top FAST_BITS and determine what symbol ID it is, 501 // if the code is <= FAST_BITS 502 c = (j.code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); 503 k = h.fast[c]; 504 if (k < 255) { 505 int s = h.size[k]; 506 if (s > j.code_bits) 507 return -1; 508 j.code_buffer <<= s; 509 j.code_bits -= s; 510 return h.values[k]; 511 } 512 513 // naive test is to shift the code_buffer down so k bits are 514 // valid, then test against maxcode. To speed this up, we've 515 // preshifted maxcode left so that it has (16-k) 0s at the 516 // end; in other words, regardless of the number of bits, it 517 // wants to be compared against something shifted to have 16; 518 // that way we don't need to shift inside the loop. 519 temp = j.code_buffer >> 16; 520 for (k=FAST_BITS+1 ; ; ++k) 521 if (temp < h.maxcode[k]) 522 break; 523 if (k == 17) { 524 // error! code not found 525 j.code_bits -= 16; 526 return -1; 527 } 528 529 if (k > j.code_bits) 530 return -1; 531 532 // convert the huffman code to the symbol id 533 c = ((j.code_buffer >> (32 - k)) & bmask[k]) + h.delta[k]; 534 assert((((j.code_buffer) >> (32 - h.size[c])) & bmask[h.size[c]]) == h.code[c]); 535 536 // convert the id to a symbol 537 j.code_bits -= k; 538 j.code_buffer <<= k; 539 return h.values[c]; 540 } 541 542 // combined JPEG 'receive' and JPEG 'extend', since baseline 543 // always extends everything it receives. 544 int extend_receive(jpeg *j, int n) 545 { 546 uint m = 1 << (n-1); 547 uint k; 548 if (j.code_bits < n) grow_buffer_unsafe(j); 549 550 k = stbi_lrot(j.code_buffer, n); 551 j.code_buffer = k & ~bmask[n]; 552 k &= bmask[n]; 553 j.code_bits -= n; 554 555 // the following test is probably a random branch that won't 556 // predict well. I tried to table accelerate it but failed. 557 // maybe it's compiling as a conditional move? 558 if (k < m) 559 return (-1 << n) + k + 1; 560 else 561 return k; 562 } 563 564 // given a value that's at position X in the zigzag stream, 565 // where does it appear in the 8x8 matrix coded as row-major? 566 static immutable ubyte dezigzag[64+15] = 567 [ 568 0, 1, 8, 16, 9, 2, 3, 10, 569 17, 24, 32, 25, 18, 11, 4, 5, 570 12, 19, 26, 33, 40, 48, 41, 34, 571 27, 20, 13, 6, 7, 14, 21, 28, 572 35, 42, 49, 56, 57, 50, 43, 36, 573 29, 22, 15, 23, 30, 37, 44, 51, 574 58, 59, 52, 45, 38, 31, 39, 46, 575 53, 60, 61, 54, 47, 55, 62, 63, 576 // let corrupt input sample past end 577 63, 63, 63, 63, 63, 63, 63, 63, 578 63, 63, 63, 63, 63, 63, 63 579 ]; 580 581 // decode one 64-entry block-- 582 int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b) 583 { 584 int diff,dc,k; 585 int t = decode(j, hdc); 586 if (t < 0) 587 throw new STBImageException("Bad huffman code, corrupt JPEG"); 588 589 // 0 all the ac values now so we can do it 32-bits at a time 590 memset(data.ptr,0,64*(data[0]).sizeof); 591 592 diff = t ? extend_receive(j, t) : 0; 593 dc = j.img_comp[b].dc_pred + diff; 594 j.img_comp[b].dc_pred = dc; 595 data[0] = cast(short) dc; 596 597 // decode AC components, see JPEG spec 598 k = 1; 599 do { 600 int r,s; 601 int rs = decode(j, hac); 602 if (rs < 0) 603 throw new STBImageException("Bad huffman code, corrupt JPEG"); 604 s = rs & 15; 605 r = rs >> 4; 606 if (s == 0) { 607 if (rs != 0xf0) break; // end block 608 k += 16; 609 } else { 610 k += r; 611 // decode into unzigzag'd location 612 data[dezigzag[k++]] = cast(short) extend_receive(j,s); 613 } 614 } while (k < 64); 615 return 1; 616 } 617 618 // take a -128..127 value and clamp it and convert to 0..255 619 ubyte clamp(int x) 620 { 621 // trick to use a single test to catch both cases 622 if (cast(uint) x > 255) { 623 if (x < 0) return 0; 624 if (x > 255) return 255; 625 } 626 return cast(ubyte) x; 627 } 628 629 int f2f(double x) 630 { 631 return cast(int)(x * 4096 + 0.5); 632 } 633 634 int fsh(int x) 635 { 636 return x << 12; 637 } 638 639 // derived from jidctint -- DCT_ISLOW 640 void IDCT_1D(int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7, 641 out int t0, out int t1, out int t2, out int t3, 642 out int x0, out int x1, out int x2, out int x3) 643 { 644 int p1,p2,p3,p4,p5; 645 p2 = s2; 646 p3 = s6; 647 p1 = (p2+p3) * f2f(0.5411961f); 648 t2 = p1 + p3*f2f(-1.847759065f); 649 t3 = p1 + p2*f2f( 0.765366865f); 650 p2 = s0; 651 p3 = s4; 652 t0 = fsh(p2+p3); 653 t1 = fsh(p2-p3); 654 x0 = t0+t3; 655 x3 = t0-t3; 656 x1 = t1+t2; 657 x2 = t1-t2; 658 t0 = s7; 659 t1 = s5; 660 t2 = s3; 661 t3 = s1; 662 p3 = t0+t2; 663 p4 = t1+t3; 664 p1 = t0+t3; 665 p2 = t1+t2; 666 p5 = (p3+p4)*f2f( 1.175875602f); 667 t0 = t0*f2f( 0.298631336f); 668 t1 = t1*f2f( 2.053119869f); 669 t2 = t2*f2f( 3.072711026f); 670 t3 = t3*f2f( 1.501321110f); 671 p1 = p5 + p1*f2f(-0.899976223f); 672 p2 = p5 + p2*f2f(-2.562915447f); 673 p3 = p3*f2f(-1.961570560f); 674 p4 = p4*f2f(-0.390180644f); 675 t3 += p1+p4; 676 t2 += p2+p3; 677 t1 += p2+p4; 678 t0 += p1+p3; 679 } 680 681 alias stbi_dequantize_t = ubyte; 682 683 // .344 seconds on 3*anemones.jpg 684 void idct_block(ubyte *out_, int out_stride, short data[64], stbi_dequantize_t *dequantize) 685 { 686 int i; 687 int[64] val; 688 int*v = val.ptr; 689 stbi_dequantize_t *dq = dequantize; 690 ubyte *o; 691 short *d = data.ptr; 692 693 // columns 694 for (i=0; i < 8; ++i,++d,++dq, ++v) { 695 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing 696 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 697 && d[40]==0 && d[48]==0 && d[56]==0) { 698 // no shortcut 0 seconds 699 // (1|2|3|4|5|6|7)==0 0 seconds 700 // all separate -0.047 seconds 701 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds 702 int dcterm = d[0] * dq[0] << 2; 703 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; 704 } else { 705 int t0, t1, t2, t3, x0, x1, x2, x3; 706 IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], 707 d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56], 708 t0, t1, t2, t3, x0, x1, x2, x3); 709 // constants scaled things up by 1<<12; let's bring them back 710 // down, but keep 2 extra bits of precision 711 x0 += 512; x1 += 512; x2 += 512; x3 += 512; 712 v[ 0] = (x0+t3) >> 10; 713 v[56] = (x0-t3) >> 10; 714 v[ 8] = (x1+t2) >> 10; 715 v[48] = (x1-t2) >> 10; 716 v[16] = (x2+t1) >> 10; 717 v[40] = (x2-t1) >> 10; 718 v[24] = (x3+t0) >> 10; 719 v[32] = (x3-t0) >> 10; 720 } 721 } 722 723 for (i=0, v=val.ptr, o=out_; i < 8; ++i,v+=8,o+=out_stride) { 724 725 // no fast case since the first 1D IDCT spread components out 726 int t0, t1, t2, t3, x0, x1, x2, x3; 727 IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7], t0, t1, t2, t3, x0, x1, x2, x3); 728 // constants scaled things up by 1<<12, plus we had 1<<2 from first 729 // loop, plus horizontal and vertical each scale by sqrt(8) so together 730 // we've got an extra 1<<3, so 1<<17 total we need to remove. 731 // so we want to round that, which means adding 0.5 * 1<<17, 732 // aka 65536. Also, we'll end up with -128 to 127 that we want 733 // to encode as 0..255 by adding 128, so we'll add that before the shift 734 x0 += 65536 + (128<<17); 735 x1 += 65536 + (128<<17); 736 x2 += 65536 + (128<<17); 737 x3 += 65536 + (128<<17); 738 // tried computing the shifts into temps, or'ing the temps to see 739 // if any were out of range, but that was slower 740 o[0] = clamp((x0+t3) >> 17); 741 o[7] = clamp((x0-t3) >> 17); 742 o[1] = clamp((x1+t2) >> 17); 743 o[6] = clamp((x1-t2) >> 17); 744 o[2] = clamp((x2+t1) >> 17); 745 o[5] = clamp((x2-t1) >> 17); 746 o[3] = clamp((x3+t0) >> 17); 747 o[4] = clamp((x3-t0) >> 17); 748 } 749 } 750 751 752 enum MARKER_none = 0xff; 753 754 // if there's a pending marker from the entropy stream, return that 755 // otherwise, fetch from the stream and get a marker. if there's no 756 // marker, return 0xff, which is never a valid marker value 757 ubyte get_marker(jpeg *j) 758 { 759 ubyte x; 760 if (j.marker != MARKER_none) { x = j.marker; j.marker = MARKER_none; return x; } 761 x = get8u(j.s); 762 if (x != 0xff) return MARKER_none; 763 while (x == 0xff) 764 x = get8u(j.s); 765 return x; 766 } 767 768 // in each scan, we'll have scan_n components, and the order 769 // of the components is specified by order[] 770 bool RESTART(int x) 771 { 772 return (x >= 0xd0) && (x <= 0xd7); 773 } 774 775 // after a restart interval, reset the entropy decoder and 776 // the dc prediction 777 void reset(jpeg *j) 778 { 779 j.code_bits = 0; 780 j.code_buffer = 0; 781 j.nomore = 0; 782 j.img_comp[0].dc_pred = j.img_comp[1].dc_pred = j.img_comp[2].dc_pred = 0; 783 j.marker = MARKER_none; 784 j.todo = j.restart_interval ? j.restart_interval : 0x7fffffff; 785 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, 786 // since we don't even allow 1<<30 pixels 787 } 788 789 int parse_entropy_coded_data(jpeg *z) 790 { 791 reset(z); 792 if (z.scan_n == 1) { 793 int i,j; 794 short data[64]; 795 int n = z.order[0]; 796 // non-interleaved data, we just need to process one block at a time, 797 // in trivial scanline order 798 // number of blocks to do just depends on how many actual "pixels" this 799 // component has, independent of interleaved MCU blocking and such 800 int w = (z.img_comp[n].x+7) >> 3; 801 int h = (z.img_comp[n].y+7) >> 3; 802 for (j=0; j < h; ++j) { 803 for (i=0; i < w; ++i) { 804 if (!decode_block(z, data, z.huff_dc.ptr+z.img_comp[n].hd, z.huff_ac.ptr+z.img_comp[n].ha, n)) return 0; 805 idct_block(z.img_comp[n].data+z.img_comp[n].w2*j*8+i*8, z.img_comp[n].w2, data, z.dequant[z.img_comp[n].tq].ptr); 806 // every data block is an MCU, so countdown the restart interval 807 if (--z.todo <= 0) { 808 if (z.code_bits < 24) grow_buffer_unsafe(z); 809 // if it's NOT a restart, then just bail, so we get corrupt data 810 // rather than no data 811 if (!RESTART(z.marker)) return 1; 812 reset(z); 813 } 814 } 815 } 816 } else { // interleaved! 817 int i,j,k,x,y; 818 short[64] data; 819 for (j=0; j < z.img_mcu_y; ++j) { 820 for (i=0; i < z.img_mcu_x; ++i) { 821 // scan an interleaved mcu... process scan_n components in order 822 for (k=0; k < z.scan_n; ++k) { 823 int n = z.order[k]; 824 // scan out an mcu's worth of this component; that's just determined 825 // by the basic H and V specified for the component 826 for (y=0; y < z.img_comp[n].v; ++y) { 827 for (x=0; x < z.img_comp[n].h; ++x) { 828 int x2 = (i*z.img_comp[n].h + x)*8; 829 int y2 = (j*z.img_comp[n].v + y)*8; 830 if (!decode_block(z, data, z.huff_dc.ptr+z.img_comp[n].hd, z.huff_ac.ptr+z.img_comp[n].ha, n)) return 0; 831 idct_block(z.img_comp[n].data+z.img_comp[n].w2*y2+x2, z.img_comp[n].w2, data, z.dequant[z.img_comp[n].tq].ptr); 832 } 833 } 834 } 835 // after all interleaved components, that's an interleaved MCU, 836 // so now count down the restart interval 837 if (--z.todo <= 0) { 838 if (z.code_bits < 24) grow_buffer_unsafe(z); 839 // if it's NOT a restart, then just bail, so we get corrupt data 840 // rather than no data 841 if (!RESTART(z.marker)) return 1; 842 reset(z); 843 } 844 } 845 } 846 } 847 return 1; 848 } 849 850 int process_marker(jpeg *z, int m) 851 { 852 int L; 853 switch (m) { 854 855 case MARKER_none: // no marker found 856 throw new STBImageException("Expected marker, corrupt JPEG"); 857 858 case 0xC2: // SOF - progressive 859 throw new STBImageException("JPEG format not supported (progressive)"); 860 861 case 0xDD: // DRI - specify restart interval 862 if (get16(z.s) != 4) 863 throw new STBImageException("Bad DRI len, corrupt JPEG"); 864 z.restart_interval = get16(z.s); 865 return 1; 866 867 case 0xDB: // DQT - define quantization table 868 L = get16(z.s)-2; 869 while (L > 0) { 870 int q = get8(z.s); 871 int p = q >> 4; 872 int t = q & 15,i; 873 if (p != 0) 874 throw new STBImageException("Bad DQT type, corrupt JPEG"); 875 if (t > 3) 876 throw new STBImageException("Bad DQT table, corrupt JPEG"); 877 for (i=0; i < 64; ++i) 878 z.dequant[t][dezigzag[i]] = get8u(z.s); 879 L -= 65; 880 } 881 return L==0; 882 883 case 0xC4: // DHT - define huffman table 884 L = get16(z.s)-2; 885 while (L > 0) { 886 ubyte *v; 887 int[16] sizes; 888 int i; 889 int m_ = 0; 890 int q = get8(z.s); 891 int tc = q >> 4; 892 int th = q & 15; 893 if (tc > 1 || th > 3) 894 throw new STBImageException("Bad DHT header, corrupt JPEG"); 895 for (i=0; i < 16; ++i) { 896 sizes[i] = get8(z.s); 897 m_ += sizes[i]; 898 } 899 L -= 17; 900 if (tc == 0) { 901 if (!build_huffman(z.huff_dc.ptr+th, sizes.ptr)) return 0; 902 v = z.huff_dc[th].values.ptr; 903 } else { 904 if (!build_huffman(z.huff_ac.ptr+th, sizes.ptr)) return 0; 905 v = z.huff_ac[th].values.ptr; 906 } 907 for (i=0; i < m_; ++i) 908 v[i] = get8u(z.s); 909 L -= m_; 910 } 911 return L==0; 912 913 default: 914 break; 915 } 916 // check for comment block or APP blocks 917 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { 918 skip(z.s, get16(z.s)-2); 919 return 1; 920 } 921 return 0; 922 } 923 924 // after we see SOS 925 int process_scan_header(jpeg *z) 926 { 927 int i; 928 int Ls = get16(z.s); 929 z.scan_n = get8(z.s); 930 if (z.scan_n < 1 || z.scan_n > 4 || z.scan_n > cast(int) z.s.img_n) 931 throw new STBImageException("Bad SOS component count, Corrupt JPEG"); 932 933 if (Ls != 6+2*z.scan_n) 934 throw new STBImageException("Bad SOS length, Corrupt JPEG"); 935 936 for (i=0; i < z.scan_n; ++i) { 937 int id = get8(z.s), which; 938 int q = get8(z.s); 939 for (which = 0; which < z.s.img_n; ++which) 940 if (z.img_comp[which].id == id) 941 break; 942 if (which == z.s.img_n) return 0; 943 z.img_comp[which].hd = q >> 4; 944 if (z.img_comp[which].hd > 3) 945 throw new STBImageException("Bad DC huff, Corrupt JPEG"); 946 z.img_comp[which].ha = q & 15; 947 if (z.img_comp[which].ha > 3) 948 throw new STBImageException("Bad AC huff, Corrupt JPEG"); 949 z.order[i] = which; 950 } 951 if (get8(z.s) != 0) 952 throw new STBImageException("Bad SOS, Corrupt JPEG"); 953 get8(z.s); // should be 63, but might be 0 954 if (get8(z.s) != 0) 955 throw new STBImageException("Bad SOS, Corrupt JPEG"); 956 957 return 1; 958 } 959 960 int process_frame_header(jpeg *z, int scan) 961 { 962 stbi *s = z.s; 963 int Lf,p,i,q, h_max=1,v_max=1,c; 964 Lf = get16(s); if (Lf < 11) throw new STBImageException("Bad SOF len, Corrupt JPEG"); 965 p = get8(s); if (p != 8) throw new STBImageException("JPEG format not supported: 8-bit only"); // JPEG baseline 966 s.img_y = get16(s); if (s.img_y == 0) throw new STBImageException("No header height, JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG 967 s.img_x = get16(s); if (s.img_x == 0) throw new STBImageException("0 width, corrupt JPEG"); // JPEG requires 968 c = get8(s); 969 if (c != 3 && c != 1) throw new STBImageException("Bad component count, corrupt JPEG"); // JFIF requires 970 s.img_n = c; 971 for (i=0; i < c; ++i) { 972 z.img_comp[i].data = null; 973 z.img_comp[i].linebuf = null; 974 } 975 976 if (Lf != 8+3*s.img_n) throw new STBImageException("Bad SOF len, corrupt JPEG"); 977 978 for (i=0; i < s.img_n; ++i) { 979 z.img_comp[i].id = get8(s); 980 if (z.img_comp[i].id != i+1) // JFIF requires 981 if (z.img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files! 982 throw new STBImageException("Bad component ID, corrupt JPEG"); 983 q = get8(s); 984 z.img_comp[i].h = (q >> 4); if (!z.img_comp[i].h || z.img_comp[i].h > 4) throw new STBImageException("Bad H, corrupt JPEG"); 985 z.img_comp[i].v = q & 15; if (!z.img_comp[i].v || z.img_comp[i].v > 4) throw new STBImageException("Bad V, corrupt JPEG"); 986 z.img_comp[i].tq = get8(s); if (z.img_comp[i].tq > 3) throw new STBImageException("Bad TQ, corrupt JPEG"); 987 } 988 989 if (scan != SCAN_load) return 1; 990 991 if ((1 << 30) / s.img_x / s.img_n < s.img_y) throw new STBImageException("Image too large to decode"); 992 993 for (i=0; i < s.img_n; ++i) { 994 if (z.img_comp[i].h > h_max) h_max = z.img_comp[i].h; 995 if (z.img_comp[i].v > v_max) v_max = z.img_comp[i].v; 996 } 997 998 // compute interleaved mcu info 999 z.img_h_max = h_max; 1000 z.img_v_max = v_max; 1001 z.img_mcu_w = h_max * 8; 1002 z.img_mcu_h = v_max * 8; 1003 z.img_mcu_x = (s.img_x + z.img_mcu_w-1) / z.img_mcu_w; 1004 z.img_mcu_y = (s.img_y + z.img_mcu_h-1) / z.img_mcu_h; 1005 1006 for (i=0; i < s.img_n; ++i) { 1007 // number of effective pixels (e.g. for non-interleaved MCU) 1008 z.img_comp[i].x = (s.img_x * z.img_comp[i].h + h_max-1) / h_max; 1009 z.img_comp[i].y = (s.img_y * z.img_comp[i].v + v_max-1) / v_max; 1010 // to simplify generation, we'll allocate enough memory to decode 1011 // the bogus oversized data from using interleaved MCUs and their 1012 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't 1013 // discard the extra data until colorspace conversion 1014 z.img_comp[i].w2 = z.img_mcu_x * z.img_comp[i].h * 8; 1015 z.img_comp[i].h2 = z.img_mcu_y * z.img_comp[i].v * 8; 1016 z.img_comp[i].raw_data = malloc(z.img_comp[i].w2 * z.img_comp[i].h2+15); 1017 if (z.img_comp[i].raw_data == null) { 1018 for(--i; i >= 0; --i) { 1019 free(z.img_comp[i].raw_data); 1020 z.img_comp[i].data = null; 1021 } 1022 throw new STBImageException("Out of memory"); 1023 } 1024 // align blocks for installable-idct using mmx/sse 1025 z.img_comp[i].data = cast(ubyte*) (( cast(size_t) z.img_comp[i].raw_data + 15) & ~15); 1026 z.img_comp[i].linebuf = null; 1027 } 1028 1029 return 1; 1030 } 1031 1032 // use comparisons since in some cases we handle more than one case (e.g. SOF) 1033 bool DNL(int x) { return x == 0xdc; } 1034 bool SOI(int x) { return x == 0xd8; } 1035 bool EOI(int x) { return x == 0xd9; } 1036 bool SOF(int x) { return x == 0xc0 || x == 0xc1; } 1037 bool SOS(int x) { return x == 0xda; } 1038 1039 int decode_jpeg_header(jpeg *z, int scan) 1040 { 1041 int m; 1042 z.marker = MARKER_none; // initialize cached marker to empty 1043 m = get_marker(z); 1044 if (!SOI(m)) throw new STBImageException("No SOI, corrupt JPEG"); 1045 if (scan == SCAN_type) return 1; 1046 m = get_marker(z); 1047 while (!SOF(m)) 1048 { 1049 1050 if (!process_marker(z,m)) return 0; 1051 m = get_marker(z); 1052 1053 1054 1055 while (m == MARKER_none) 1056 { 1057 // some files have extra padding after their blocks, so ok, we'll scan 1058 if (at_eof(z.s)) throw new STBImageException("No SOF, corrupt JPEG"); 1059 m = get_marker(z); 1060 } 1061 } 1062 if (!process_frame_header(z, scan)) return 0; 1063 return 1; 1064 } 1065 1066 int decode_jpeg_image(jpeg *j) 1067 { 1068 int m; 1069 j.restart_interval = 0; 1070 if (!decode_jpeg_header(j, SCAN_load)) return 0; 1071 m = get_marker(j); 1072 while (!EOI(m)) { 1073 if (SOS(m)) { 1074 if (!process_scan_header(j)) return 0; 1075 if (!parse_entropy_coded_data(j)) return 0; 1076 if (j.marker == MARKER_none ) { 1077 // handle 0s at the end of image data from IP Kamera 9060 1078 while (!at_eof(j.s)) { 1079 int x = get8(j.s); 1080 if (x == 255) { 1081 j.marker = get8u(j.s); 1082 break; 1083 } else if (x != 0) { 1084 return 0; 1085 } 1086 } 1087 // if we reach eof without hitting a marker, get_marker() below will fail and we'll eventually return 0 1088 } 1089 } else { 1090 if (!process_marker(j, m)) return 0; 1091 } 1092 m = get_marker(j); 1093 } 1094 return 1; 1095 } 1096 1097 // static jfif-centered resampling (across block boundaries) 1098 1099 alias resample_row_func = ubyte* function(ubyte *out_, ubyte *in0, ubyte *in1, int w, int hs); 1100 1101 ubyte div4(int x) 1102 { 1103 return cast(ubyte)(x >> 2); 1104 } 1105 1106 ubyte *resample_row_1(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1107 { 1108 return in_near; 1109 } 1110 1111 ubyte* resample_row_v_2(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1112 { 1113 // need to generate two samples vertically for every one in input 1114 int i; 1115 for (i=0; i < w; ++i) 1116 out_[i] = div4(3*in_near[i] + in_far[i] + 2); 1117 return out_; 1118 } 1119 1120 ubyte* resample_row_h_2(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1121 { 1122 // need to generate two samples horizontally for every one in input 1123 int i; 1124 ubyte *input = in_near; 1125 1126 if (w == 1) { 1127 // if only one sample, can't do any interpolation 1128 out_[0] = out_[1] = input[0]; 1129 return out_; 1130 } 1131 1132 out_[0] = input[0]; 1133 out_[1] = div4(input[0]*3 + input[1] + 2); 1134 for (i=1; i < w-1; ++i) { 1135 int n = 3*input[i]+2; 1136 out_[i*2+0] = div4(n+input[i-1]); 1137 out_[i*2+1] = div4(n+input[i+1]); 1138 } 1139 out_[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2); 1140 out_[i*2+1] = input[w-1]; 1141 1142 return out_; 1143 } 1144 1145 ubyte div16(int x) 1146 { 1147 return cast(ubyte)(x >> 4); 1148 } 1149 1150 1151 ubyte *resample_row_hv_2(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1152 { 1153 // need to generate 2x2 samples for every one in input 1154 int i,t0,t1; 1155 if (w == 1) { 1156 out_[0] = out_[1] = div4(3*in_near[0] + in_far[0] + 2); 1157 return out_; 1158 } 1159 1160 t1 = 3*in_near[0] + in_far[0]; 1161 out_[0] = div4(t1+2); 1162 for (i=1; i < w; ++i) { 1163 t0 = t1; 1164 t1 = 3*in_near[i]+in_far[i]; 1165 out_[i*2-1] = div16(3*t0 + t1 + 8); 1166 out_[i*2 ] = div16(3*t1 + t0 + 8); 1167 } 1168 out_[w*2-1] = div4(t1+2); 1169 1170 return out_; 1171 } 1172 1173 ubyte *resample_row_generic(ubyte *out_, ubyte *in_near, ubyte *in_far, int w, int hs) 1174 { 1175 // resample with nearest-neighbor 1176 int i,j; 1177 in_far = in_far; 1178 for (i=0; i < w; ++i) 1179 for (j=0; j < hs; ++j) 1180 out_[i*hs+j] = in_near[i]; 1181 return out_; 1182 } 1183 1184 int float2fixed(double x) 1185 { 1186 return cast(int)((x) * 65536 + 0.5); 1187 } 1188 1189 // 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro) 1190 // VC6 without processor=Pro is generating multiple LEAs per multiply! 1191 void YCbCr_to_RGB_row(ubyte *out_, const ubyte *y, const ubyte *pcb, const ubyte *pcr, int count, int step) 1192 { 1193 int i; 1194 for (i=0; i < count; ++i) { 1195 int y_fixed = (y[i] << 16) + 32768; // rounding 1196 int r,g,b; 1197 int cr = pcr[i] - 128; 1198 int cb = pcb[i] - 128; 1199 r = y_fixed + cr*float2fixed(1.40200f); 1200 g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f); 1201 b = y_fixed + cb*float2fixed(1.77200f); 1202 r >>= 16; 1203 g >>= 16; 1204 b >>= 16; 1205 if (cast(uint) r > 255) { if (r < 0) r = 0; else r = 255; } 1206 if (cast(uint) g > 255) { if (g < 0) g = 0; else g = 255; } 1207 if (cast(uint) b > 255) { if (b < 0) b = 0; else b = 255; } 1208 out_[0] = cast(ubyte)r; 1209 out_[1] = cast(ubyte)g; 1210 out_[2] = cast(ubyte)b; 1211 out_[3] = 255; 1212 out_ += step; 1213 } 1214 } 1215 1216 // clean up the temporary component buffers 1217 void cleanup_jpeg(jpeg *j) 1218 { 1219 int i; 1220 for (i=0; i < j.s.img_n; ++i) { 1221 if (j.img_comp[i].data) { 1222 free(j.img_comp[i].raw_data); 1223 j.img_comp[i].data = null; 1224 } 1225 if (j.img_comp[i].linebuf) { 1226 free(j.img_comp[i].linebuf); 1227 j.img_comp[i].linebuf = null; 1228 } 1229 } 1230 } 1231 1232 struct stbi_resample 1233 { 1234 resample_row_func resample; 1235 ubyte* line0; 1236 ubyte* line1; 1237 int hs,vs; // expansion factor in each axis 1238 int w_lores; // horizontal pixels pre-expansion 1239 int ystep; // how far through vertical expansion we are 1240 int ypos; // which pre-expansion row we're on 1241 } ; 1242 1243 ubyte *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) 1244 { 1245 int n, decode_n; 1246 // validate req_comp 1247 if (req_comp < 0 || req_comp > 4) 1248 throw new STBImageException("Internal error: bad req_comp"); 1249 z.s.img_n = 0; 1250 1251 // load a jpeg image from whichever source 1252 if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return null; } 1253 1254 // determine actual number of components to generate 1255 n = req_comp ? req_comp : z.s.img_n; 1256 1257 if (z.s.img_n == 3 && n < 3) 1258 decode_n = 1; 1259 else 1260 decode_n = z.s.img_n; 1261 1262 // resample and color-convert 1263 { 1264 int k; 1265 uint i,j; 1266 ubyte *output; 1267 ubyte *coutput[4]; 1268 1269 stbi_resample res_comp[4]; 1270 1271 for (k=0; k < decode_n; ++k) { 1272 stbi_resample *r = &res_comp[k]; 1273 1274 // allocate line buffer big enough for upsampling off the edges 1275 // with upsample factor of 4 1276 z.img_comp[k].linebuf = cast(ubyte*) malloc(z.s.img_x + 3); 1277 if (!z.img_comp[k].linebuf) 1278 { 1279 cleanup_jpeg(z); 1280 throw new STBImageException("Out of memory"); 1281 } 1282 1283 r.hs = z.img_h_max / z.img_comp[k].h; 1284 r.vs = z.img_v_max / z.img_comp[k].v; 1285 r.ystep = r.vs >> 1; 1286 r.w_lores = (z.s.img_x + r.hs-1) / r.hs; 1287 r.ypos = 0; 1288 r.line0 = r.line1 = z.img_comp[k].data; 1289 1290 if (r.hs == 1 && r.vs == 1) r.resample = &resample_row_1; 1291 else if (r.hs == 1 && r.vs == 2) r.resample = &resample_row_v_2; 1292 else if (r.hs == 2 && r.vs == 1) r.resample = &resample_row_h_2; 1293 else if (r.hs == 2 && r.vs == 2) r.resample = &resample_row_hv_2; 1294 else r.resample = &resample_row_generic; 1295 } 1296 1297 // can't error after this so, this is safe 1298 output = cast(ubyte*) malloc(n * z.s.img_x * z.s.img_y + 1); 1299 if (!output) { cleanup_jpeg(z); throw new STBImageException("Out of memory"); } 1300 1301 // now go ahead and resample 1302 for (j=0; j < z.s.img_y; ++j) { 1303 ubyte *out_ = output + n * z.s.img_x * j; 1304 for (k=0; k < decode_n; ++k) { 1305 stbi_resample *r = &res_comp[k]; 1306 int y_bot = r.ystep >= (r.vs >> 1); 1307 coutput[k] = r.resample(z.img_comp[k].linebuf, 1308 y_bot ? r.line1 : r.line0, 1309 y_bot ? r.line0 : r.line1, 1310 r.w_lores, r.hs); 1311 if (++r.ystep >= r.vs) { 1312 r.ystep = 0; 1313 r.line0 = r.line1; 1314 if (++r.ypos < z.img_comp[k].y) 1315 r.line1 += z.img_comp[k].w2; 1316 } 1317 } 1318 if (n >= 3) { 1319 ubyte *y = coutput[0]; 1320 if (z.s.img_n == 3) { 1321 YCbCr_to_RGB_row(out_, y, coutput[1], coutput[2], z.s.img_x, n); 1322 } else 1323 for (i=0; i < z.s.img_x; ++i) { 1324 out_[0] = out_[1] = out_[2] = y[i]; 1325 out_[3] = 255; // not used if n==3 1326 out_ += n; 1327 } 1328 } else { 1329 ubyte *y = coutput[0]; 1330 if (n == 1) 1331 for (i=0; i < z.s.img_x; ++i) out_[i] = y[i]; 1332 else 1333 for (i=0; i < z.s.img_x; ++i) *out_++ = y[i], *out_++ = 255; 1334 } 1335 } 1336 cleanup_jpeg(z); 1337 *out_x = z.s.img_x; 1338 *out_y = z.s.img_y; 1339 if (comp) *comp = z.s.img_n; // report original components, not output 1340 return output; 1341 } 1342 } 1343 1344 ubyte* stbi_jpeg_load(stbi *s, int *x, int *y, int *comp, int req_comp) 1345 { 1346 jpeg j; 1347 j.s = s; 1348 return load_jpeg_image(&j, x,y,comp,req_comp); 1349 } 1350 1351 void stbi_jpeg_test(stbi *s) 1352 { 1353 jpeg j; 1354 j.s = s; 1355 int r = decode_jpeg_header(&j, SCAN_type); 1356 if (r == 0) 1357 throw new STBImageException("Couldn't decode JPEG header"); 1358 } 1359 1360 1361 // public domain zlib decode v0.2 Sean Barrett 2006-11-18 1362 // simple implementation 1363 // - all input must be provided in an upfront buffer 1364 // - all output is written to a single output buffer (can malloc/realloc) 1365 // performance 1366 // - fast huffman 1367 1368 // fast-way is faster to check than jpeg huffman, but slow way is slower 1369 enum ZFAST_BITS = 9; // accelerate all cases in default tables 1370 enum ZFAST_MASK = ((1 << ZFAST_BITS) - 1); 1371 1372 // zlib-style huffman encoding 1373 // (jpegs packs from left, zlib from right, so can't share code) 1374 struct zhuffman 1375 { 1376 ushort[1 << ZFAST_BITS] fast; 1377 ushort[16] firstcode; 1378 int[17] maxcode; 1379 ushort[16] firstsymbol; 1380 ubyte[288] size; 1381 ushort[288] value; 1382 } ; 1383 1384 int bitreverse16(int n) 1385 { 1386 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); 1387 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); 1388 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); 1389 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); 1390 return n; 1391 } 1392 1393 int bit_reverse(int v, int bits) 1394 { 1395 assert(bits <= 16); 1396 // to bit reverse n bits, reverse 16 and shift 1397 // e.g. 11 bits, bit reverse and shift away 5 1398 return bitreverse16(v) >> (16-bits); 1399 } 1400 1401 int zbuild_huffman(zhuffman *z, ubyte *sizelist, int num) 1402 { 1403 int i,k=0; 1404 int code; 1405 int[16] next_code; 1406 int[17] sizes; 1407 1408 // DEFLATE spec for generating codes 1409 memset(sizes.ptr, 0, sizes.sizeof); 1410 memset(z.fast.ptr, 255, z.fast.sizeof); 1411 for (i=0; i < num; ++i) 1412 ++sizes[sizelist[i]]; 1413 sizes[0] = 0; 1414 for (i=1; i < 16; ++i) 1415 assert(sizes[i] <= (1 << i)); 1416 code = 0; 1417 for (i=1; i < 16; ++i) { 1418 next_code[i] = code; 1419 z.firstcode[i] = cast(ushort) code; 1420 z.firstsymbol[i] = cast(ushort) k; 1421 code = (code + sizes[i]); 1422 if (sizes[i]) 1423 if (code-1 >= (1 << i)) 1424 throw new STBImageException("Bad codelength, corrupt JPEG"); 1425 z.maxcode[i] = code << (16-i); // preshift for inner loop 1426 code <<= 1; 1427 k += sizes[i]; 1428 } 1429 z.maxcode[16] = 0x10000; // sentinel 1430 for (i=0; i < num; ++i) { 1431 int s = sizelist[i]; 1432 if (s) { 1433 int c = next_code[s] - z.firstcode[s] + z.firstsymbol[s]; 1434 z.size[c] = cast(ubyte)s; 1435 z.value[c] = cast(ushort)i; 1436 if (s <= ZFAST_BITS) { 1437 int k_ = bit_reverse(next_code[s],s); 1438 while (k_ < (1 << ZFAST_BITS)) { 1439 z.fast[k_] = cast(ushort) c; 1440 k_ += (1 << s); 1441 } 1442 } 1443 ++next_code[s]; 1444 } 1445 } 1446 return 1; 1447 } 1448 1449 // zlib-from-memory implementation for PNG reading 1450 // because PNG allows splitting the zlib stream arbitrarily, 1451 // and it's annoying structurally to have PNG call ZLIB call PNG, 1452 // we require PNG read all the IDATs and combine them into a single 1453 // memory buffer 1454 1455 struct zbuf 1456 { 1457 const(ubyte) *zbuffer; 1458 const(ubyte) *zbuffer_end; 1459 int num_bits; 1460 uint code_buffer; 1461 1462 ubyte *zout; 1463 ubyte *zout_start; 1464 ubyte *zout_end; 1465 int z_expandable; 1466 1467 zhuffman z_length, z_distance; 1468 } ; 1469 1470 int zget8(zbuf *z) 1471 { 1472 if (z.zbuffer >= z.zbuffer_end) return 0; 1473 return *z.zbuffer++; 1474 } 1475 1476 void fill_bits(zbuf *z) 1477 { 1478 do { 1479 assert(z.code_buffer < (1U << z.num_bits)); 1480 z.code_buffer |= zget8(z) << z.num_bits; 1481 z.num_bits += 8; 1482 } while (z.num_bits <= 24); 1483 } 1484 1485 uint zreceive(zbuf *z, int n) 1486 { 1487 uint k; 1488 if (z.num_bits < n) fill_bits(z); 1489 k = z.code_buffer & ((1 << n) - 1); 1490 z.code_buffer >>= n; 1491 z.num_bits -= n; 1492 return k; 1493 } 1494 1495 int zhuffman_decode(zbuf *a, zhuffman *z) 1496 { 1497 int b,s,k; 1498 if (a.num_bits < 16) fill_bits(a); 1499 b = z.fast[a.code_buffer & ZFAST_MASK]; 1500 if (b < 0xffff) { 1501 s = z.size[b]; 1502 a.code_buffer >>= s; 1503 a.num_bits -= s; 1504 return z.value[b]; 1505 } 1506 1507 // not resolved by fast table, so compute it the slow way 1508 // use jpeg approach, which requires MSbits at top 1509 k = bit_reverse(a.code_buffer, 16); 1510 for (s=ZFAST_BITS+1; ; ++s) 1511 if (k < z.maxcode[s]) 1512 break; 1513 if (s == 16) return -1; // invalid code! 1514 // code size is s, so: 1515 b = (k >> (16-s)) - z.firstcode[s] + z.firstsymbol[s]; 1516 assert(z.size[b] == s); 1517 a.code_buffer >>= s; 1518 a.num_bits -= s; 1519 return z.value[b]; 1520 } 1521 1522 int expand(zbuf *z, int n) // need to make room for n bytes 1523 { 1524 ubyte *q; 1525 int cur, limit; 1526 if (!z.z_expandable) 1527 throw new STBImageException("Output buffer limit, corrupt PNG"); 1528 cur = cast(int) (z.zout - z.zout_start); 1529 limit = cast(int) (z.zout_end - z.zout_start); 1530 while (cur + n > limit) 1531 limit *= 2; 1532 q = cast(ubyte*) realloc(z.zout_start, limit); 1533 if (q == null) 1534 throw new STBImageException("Out of memory"); 1535 z.zout_start = q; 1536 z.zout = q + cur; 1537 z.zout_end = q + limit; 1538 return 1; 1539 } 1540 1541 static immutable int length_base[31] = [ 1542 3,4,5,6,7,8,9,10,11,13, 1543 15,17,19,23,27,31,35,43,51,59, 1544 67,83,99,115,131,163,195,227,258,0,0 ]; 1545 1546 static immutable int length_extra[31]= 1547 [ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 ]; 1548 1549 static immutable int dist_base[32] = [ 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 1550 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0]; 1551 1552 static immutable int dist_extra[32] = 1553 [ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13]; 1554 1555 int parse_huffman_block(zbuf *a) 1556 { 1557 for(;;) { 1558 int z = zhuffman_decode(a, &a.z_length); 1559 if (z < 256) { 1560 if (z < 0) 1561 throw new STBImageException("Bad Huffman code, corrupt PNG"); 1562 if (a.zout >= a.zout_end) if (!expand(a, 1)) return 0; 1563 *a.zout++ = cast(ubyte) z; 1564 } else { 1565 ubyte *p; 1566 int len,dist; 1567 if (z == 256) return 1; 1568 z -= 257; 1569 len = length_base[z]; 1570 if (length_extra[z]) len += zreceive(a, length_extra[z]); 1571 z = zhuffman_decode(a, &a.z_distance); 1572 if (z < 0) throw new STBImageException("Bad Huffman code, corrupt PNG"); 1573 dist = dist_base[z]; 1574 if (dist_extra[z]) dist += zreceive(a, dist_extra[z]); 1575 if (a.zout - a.zout_start < dist) throw new STBImageException("Bad dist, corrupt PNG"); 1576 if (a.zout + len > a.zout_end) if (!expand(a, len)) return 0; 1577 p = a.zout - dist; 1578 while (len--) 1579 *a.zout++ = *p++; 1580 } 1581 } 1582 } 1583 1584 int compute_huffman_codes(zbuf *a) 1585 { 1586 static immutable ubyte length_dezigzag[19] = [ 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 ]; 1587 zhuffman z_codelength; 1588 ubyte lencodes[286+32+137];//padding for maximum single op 1589 ubyte codelength_sizes[19]; 1590 int i,n; 1591 1592 int hlit = zreceive(a,5) + 257; 1593 int hdist = zreceive(a,5) + 1; 1594 int hclen = zreceive(a,4) + 4; 1595 1596 memset(codelength_sizes.ptr, 0, codelength_sizes.sizeof); 1597 for (i=0; i < hclen; ++i) { 1598 int s = zreceive(a,3); 1599 codelength_sizes[length_dezigzag[i]] = cast(ubyte) s; 1600 } 1601 if (!zbuild_huffman(&z_codelength, codelength_sizes.ptr, 19)) return 0; 1602 1603 n = 0; 1604 while (n < hlit + hdist) { 1605 int c = zhuffman_decode(a, &z_codelength); 1606 assert(c >= 0 && c < 19); 1607 if (c < 16) 1608 lencodes[n++] = cast(ubyte) c; 1609 else if (c == 16) { 1610 c = zreceive(a,2)+3; 1611 memset(lencodes.ptr+n, lencodes[n-1], c); 1612 n += c; 1613 } else if (c == 17) { 1614 c = zreceive(a,3)+3; 1615 memset(lencodes.ptr+n, 0, c); 1616 n += c; 1617 } else { 1618 assert(c == 18); 1619 c = zreceive(a,7)+11; 1620 memset(lencodes.ptr+n, 0, c); 1621 n += c; 1622 } 1623 } 1624 if (n != hlit+hdist) throw new STBImageException("Bad codelengths, corrupt PNG"); 1625 if (!zbuild_huffman(&a.z_length, lencodes.ptr, hlit)) return 0; 1626 if (!zbuild_huffman(&a.z_distance, lencodes.ptr+hlit, hdist)) return 0; 1627 return 1; 1628 } 1629 1630 int parse_uncompressed_block(zbuf *a) 1631 { 1632 ubyte header[4]; 1633 int len,nlen,k; 1634 if (a.num_bits & 7) 1635 zreceive(a, a.num_bits & 7); // discard 1636 // drain the bit-packed data into header 1637 k = 0; 1638 while (a.num_bits > 0) { 1639 header[k++] = cast(ubyte) (a.code_buffer & 255); // wtf this warns? 1640 a.code_buffer >>= 8; 1641 a.num_bits -= 8; 1642 } 1643 assert(a.num_bits == 0); 1644 // now fill header the normal way 1645 while (k < 4) 1646 header[k++] = cast(ubyte) zget8(a); 1647 len = header[1] * 256 + header[0]; 1648 nlen = header[3] * 256 + header[2]; 1649 if (nlen != (len ^ 0xffff)) throw new STBImageException("Zlib corrupt, corrupt PNG"); 1650 if (a.zbuffer + len > a.zbuffer_end) throw new STBImageException("Read past buffer, corrupt PNG"); 1651 if (a.zout + len > a.zout_end) 1652 if (!expand(a, len)) return 0; 1653 memcpy(a.zout, a.zbuffer, len); 1654 a.zbuffer += len; 1655 a.zout += len; 1656 return 1; 1657 } 1658 1659 int parse_zlib_header(zbuf *a) 1660 { 1661 int cmf = zget8(a); 1662 int cm = cmf & 15; 1663 /* int cinfo = cmf >> 4; */ 1664 int flg = zget8(a); 1665 if ((cmf*256+flg) % 31 != 0) throw new STBImageException("Bad zlib header, corrupt PNG"); // zlib spec 1666 if (flg & 32) throw new STBImageException("No preset dict, corrupt PNG"); // preset dictionary not allowed in png 1667 if (cm != 8) throw new STBImageException("Bad compression, corrupt PNG"); // DEFLATE required for png 1668 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output 1669 return 1; 1670 } 1671 1672 // @TODO: should statically initialize these for optimal thread safety 1673 __gshared ubyte[288] default_length; 1674 __gshared ubyte[32] default_distance; 1675 1676 void init_defaults() 1677 { 1678 int i; // use <= to match clearly with spec 1679 for (i=0; i <= 143; ++i) default_length[i] = 8; 1680 for ( ; i <= 255; ++i) default_length[i] = 9; 1681 for ( ; i <= 279; ++i) default_length[i] = 7; 1682 for ( ; i <= 287; ++i) default_length[i] = 8; 1683 1684 for (i=0; i <= 31; ++i) default_distance[i] = 5; 1685 } 1686 1687 __gshared int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead 1688 int parse_zlib(zbuf *a, int parse_header) 1689 { 1690 int final_, type; 1691 if (parse_header) 1692 if (!parse_zlib_header(a)) return 0; 1693 a.num_bits = 0; 1694 a.code_buffer = 0; 1695 do { 1696 final_ = zreceive(a,1); 1697 type = zreceive(a,2); 1698 if (type == 0) { 1699 if (!parse_uncompressed_block(a)) return 0; 1700 } else if (type == 3) { 1701 return 0; 1702 } else { 1703 if (type == 1) { 1704 // use fixed code lengths 1705 if (!default_distance[31]) init_defaults(); 1706 if (!zbuild_huffman(&a.z_length , default_length.ptr , 288)) return 0; 1707 if (!zbuild_huffman(&a.z_distance, default_distance.ptr, 32)) return 0; 1708 } else { 1709 if (!compute_huffman_codes(a)) return 0; 1710 } 1711 if (!parse_huffman_block(a)) return 0; 1712 } 1713 if (stbi_png_partial && a.zout - a.zout_start > 65536) 1714 break; 1715 } while (!final_); 1716 return 1; 1717 } 1718 1719 int do_zlib(zbuf *a, ubyte *obuf, int olen, int exp, int parse_header) 1720 { 1721 a.zout_start = obuf; 1722 a.zout = obuf; 1723 a.zout_end = obuf + olen; 1724 a.z_expandable = exp; 1725 1726 return parse_zlib(a, parse_header); 1727 } 1728 1729 ubyte *stbi_zlib_decode_malloc_guesssize(const(ubyte) *buffer, int len, int initial_size, int *outlen) 1730 { 1731 zbuf a; 1732 ubyte *p = cast(ubyte*) malloc(initial_size); 1733 if (p == null) return null; 1734 a.zbuffer = buffer; 1735 a.zbuffer_end = buffer + len; 1736 if (do_zlib(&a, p, initial_size, 1, 1)) { 1737 if (outlen) *outlen = cast(int) (a.zout - a.zout_start); 1738 return a.zout_start; 1739 } else { 1740 free(a.zout_start); 1741 return null; 1742 } 1743 } 1744 1745 ubyte *stbi_zlib_decode_malloc(const(ubyte) *buffer, int len, int *outlen) 1746 { 1747 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); 1748 } 1749 1750 ubyte *stbi_zlib_decode_malloc_guesssize_headerflag(const(ubyte) *buffer, int len, int initial_size, int *outlen, int parse_header) 1751 { 1752 zbuf a; 1753 ubyte *p = cast(ubyte*) malloc(initial_size); 1754 if (p == null) return null; 1755 a.zbuffer = buffer; 1756 a.zbuffer_end = buffer + len; 1757 if (do_zlib(&a, p, initial_size, 1, parse_header)) { 1758 if (outlen) *outlen = cast(int) (a.zout - a.zout_start); 1759 return a.zout_start; 1760 } else { 1761 free(a.zout_start); 1762 return null; 1763 } 1764 } 1765 1766 int stbi_zlib_decode_buffer(ubyte* obuffer, int olen, const(ubyte)* ibuffer, int ilen) 1767 { 1768 zbuf a; 1769 a.zbuffer = ibuffer; 1770 a.zbuffer_end = ibuffer + ilen; 1771 if (do_zlib(&a, obuffer, olen, 0, 1)) 1772 return cast(int) (a.zout - a.zout_start); 1773 else 1774 return -1; 1775 } 1776 1777 ubyte *stbi_zlib_decode_noheader_malloc(const(ubyte) *buffer, int len, int *outlen) 1778 { 1779 zbuf a; 1780 ubyte *p = cast(ubyte*) malloc(16384); 1781 if (p == null) return null; 1782 a.zbuffer = buffer; 1783 a.zbuffer_end = buffer+len; 1784 if (do_zlib(&a, p, 16384, 1, 0)) { 1785 if (outlen) *outlen = cast(int) (a.zout - a.zout_start); 1786 return a.zout_start; 1787 } else { 1788 free(a.zout_start); 1789 return null; 1790 } 1791 } 1792 1793 int stbi_zlib_decode_noheader_buffer(ubyte *obuffer, int olen, const(ubyte) *ibuffer, int ilen) 1794 { 1795 zbuf a; 1796 a.zbuffer = ibuffer; 1797 a.zbuffer_end = ibuffer + ilen; 1798 if (do_zlib(&a, obuffer, olen, 0, 0)) 1799 return cast(int) (a.zout - a.zout_start); 1800 else 1801 return -1; 1802 } 1803 1804 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 1805 // simple implementation 1806 // - only 8-bit samples 1807 // - no CRC checking 1808 // - allocates lots of intermediate memory 1809 // - avoids problem of streaming data between subsystems 1810 // - avoids explicit window management 1811 // performance 1812 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding 1813 1814 1815 struct chunk 1816 { 1817 uint length; 1818 uint type; 1819 } 1820 1821 uint PNG_TYPE(ubyte a, ubyte b, ubyte c, ubyte d) 1822 { 1823 return (a << 24) + (b << 16) + (c << 8) + d; 1824 } 1825 1826 chunk get_chunk_header(stbi *s) 1827 { 1828 chunk c; 1829 c.length = get32(s); 1830 c.type = get32(s); 1831 return c; 1832 } 1833 1834 static int check_png_header(stbi *s) 1835 { 1836 static immutable ubyte[8] png_sig = [ 137, 80, 78, 71, 13, 10, 26, 10 ]; 1837 for (int i = 0; i < 8; ++i) 1838 { 1839 ubyte headerByte = get8u(s); 1840 ubyte expected = png_sig[i]; 1841 if (headerByte != expected) 1842 throw new STBImageException("Bad PNG sig, not a PNG"); 1843 } 1844 return 1; 1845 } 1846 1847 struct png 1848 { 1849 stbi *s; 1850 ubyte *idata; 1851 ubyte *expanded; 1852 ubyte *out_; 1853 } 1854 1855 1856 enum : int 1857 { 1858 F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4, 1859 F_avg_first, F_paeth_first 1860 } 1861 1862 static immutable ubyte[5] first_row_filter = 1863 [ 1864 F_none, F_sub, F_none, F_avg_first, F_paeth_first 1865 ]; 1866 1867 static int paeth(int a, int b, int c) 1868 { 1869 int p = a + b - c; 1870 int pa = abs(p-a); 1871 int pb = abs(p-b); 1872 int pc = abs(p-c); 1873 if (pa <= pb && pa <= pc) return a; 1874 if (pb <= pc) return b; 1875 return c; 1876 } 1877 1878 // create the png data from post-deflated data 1879 static int create_png_image_raw(png *a, ubyte *raw, uint raw_len, int out_n, uint x, uint y) 1880 { 1881 stbi *s = a.s; 1882 uint i,j,stride = x*out_n; 1883 int k; 1884 int img_n = s.img_n; // copy it into a local for later 1885 assert(out_n == s.img_n || out_n == s.img_n+1); 1886 if (stbi_png_partial) y = 1; 1887 a.out_ = cast(ubyte*) malloc(x * y * out_n); 1888 if (!a.out_) throw new STBImageException("Out of memory"); 1889 if (!stbi_png_partial) { 1890 if (s.img_x == x && s.img_y == y) { 1891 if (raw_len != (img_n * x + 1) * y) throw new STBImageException("Not enough pixels, corrupt PNG"); 1892 } else { // interlaced: 1893 if (raw_len < (img_n * x + 1) * y) throw new STBImageException("Not enough pixels, corrupt PNG"); 1894 } 1895 } 1896 for (j=0; j < y; ++j) { 1897 ubyte *cur = a.out_ + stride*j; 1898 ubyte *prior = cur - stride; 1899 int filter = *raw++; 1900 if (filter > 4) throw new STBImageException("Invalid filter, corrupt PNG"); 1901 // if first row, use special filter that doesn't sample previous row 1902 if (j == 0) filter = first_row_filter[filter]; 1903 // handle first pixel explicitly 1904 for (k=0; k < img_n; ++k) { 1905 switch (filter) { 1906 case F_none : cur[k] = raw[k]; break; 1907 case F_sub : cur[k] = raw[k]; break; 1908 case F_up : cur[k] = cast(ubyte)(raw[k] + prior[k]); break; 1909 case F_avg : cur[k] = cast(ubyte)(raw[k] + (prior[k]>>1)); break; 1910 case F_paeth : cur[k] = cast(ubyte) (raw[k] + paeth(0,prior[k],0)); break; 1911 case F_avg_first : cur[k] = raw[k]; break; 1912 case F_paeth_first: cur[k] = raw[k]; break; 1913 default: break; 1914 } 1915 } 1916 if (img_n != out_n) cur[img_n] = 255; 1917 raw += img_n; 1918 cur += out_n; 1919 prior += out_n; 1920 // this is a little gross, so that we don't switch per-pixel or per-component 1921 if (img_n == out_n) { 1922 1923 for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) 1924 for (k=0; k < img_n; ++k) 1925 { 1926 switch (filter) { 1927 case F_none: cur[k] = raw[k]; break; 1928 case F_sub: cur[k] = cast(ubyte)(raw[k] + cur[k-img_n]); break; 1929 case F_up: cur[k] = cast(ubyte)(raw[k] + prior[k]); break; 1930 case F_avg: cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-img_n])>>1)); break; 1931 case F_paeth: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break; 1932 case F_avg_first: cur[k] = cast(ubyte)(raw[k] + (cur[k-img_n] >> 1)); break; 1933 case F_paeth_first: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-img_n],0,0)); break; 1934 default: break; 1935 } 1936 } 1937 } else { 1938 assert(img_n+1 == out_n); 1939 1940 for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) 1941 for (k=0; k < img_n; ++k) 1942 { 1943 switch (filter) { 1944 case F_none: cur[k] = raw[k]; break; 1945 case F_sub: cur[k] = cast(ubyte)(raw[k] + cur[k-out_n]); break; 1946 case F_up: cur[k] = cast(ubyte)(raw[k] + prior[k]); break; 1947 case F_avg: cur[k] = cast(ubyte)(raw[k] + ((prior[k] + cur[k-out_n])>>1)); break; 1948 case F_paeth: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break; 1949 case F_avg_first: cur[k] = cast(ubyte)(raw[k] + (cur[k-out_n] >> 1)); break; 1950 case F_paeth_first: cur[k] = cast(ubyte) (raw[k] + paeth(cur[k-out_n],0,0)); break; 1951 default: break; 1952 } 1953 } 1954 } 1955 } 1956 return 1; 1957 } 1958 1959 int create_png_image(png *a, ubyte *raw, uint raw_len, int out_n, int interlaced) 1960 { 1961 ubyte *final_; 1962 int p; 1963 int save; 1964 if (!interlaced) 1965 return create_png_image_raw(a, raw, raw_len, out_n, a.s.img_x, a.s.img_y); 1966 save = stbi_png_partial; 1967 stbi_png_partial = 0; 1968 1969 // de-interlacing 1970 final_ = cast(ubyte*) malloc(a.s.img_x * a.s.img_y * out_n); 1971 for (p=0; p < 7; ++p) { 1972 int xorig[] = [ 0,4,0,2,0,1,0 ]; 1973 int yorig[] = [ 0,0,4,0,2,0,1 ]; 1974 int xspc[] = [ 8,8,4,4,2,2,1 ]; 1975 int yspc[] = [ 8,8,8,4,4,2,2 ]; 1976 int i,j,x,y; 1977 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 1978 x = (a.s.img_x - xorig[p] + xspc[p]-1) / xspc[p]; 1979 y = (a.s.img_y - yorig[p] + yspc[p]-1) / yspc[p]; 1980 if (x && y) { 1981 if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) { 1982 free(final_); 1983 return 0; 1984 } 1985 for (j=0; j < y; ++j) 1986 for (i=0; i < x; ++i) 1987 memcpy(final_ + (j*yspc[p]+yorig[p])*a.s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n, 1988 a.out_ + (j*x+i)*out_n, out_n); 1989 free(a.out_); 1990 raw += (x*out_n+1)*y; 1991 raw_len -= (x*out_n+1)*y; 1992 } 1993 } 1994 a.out_ = final_; 1995 1996 stbi_png_partial = save; 1997 return 1; 1998 } 1999 2000 static int compute_transparency(png *z, ubyte tc[3], int out_n) 2001 { 2002 stbi *s = z.s; 2003 uint i, pixel_count = s.img_x * s.img_y; 2004 ubyte *p = z.out_; 2005 2006 // compute color-based transparency, assuming we've 2007 // already got 255 as the alpha value in the output 2008 assert(out_n == 2 || out_n == 4); 2009 2010 if (out_n == 2) { 2011 for (i=0; i < pixel_count; ++i) { 2012 p[1] = (p[0] == tc[0] ? 0 : 255); 2013 p += 2; 2014 } 2015 } else { 2016 for (i=0; i < pixel_count; ++i) { 2017 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) 2018 p[3] = 0; 2019 p += 4; 2020 } 2021 } 2022 return 1; 2023 } 2024 2025 int expand_palette(png *a, ubyte *palette, int len, int pal_img_n) 2026 { 2027 uint i, pixel_count = a.s.img_x * a.s.img_y; 2028 ubyte *p; 2029 ubyte *temp_out; 2030 ubyte *orig = a.out_; 2031 2032 p = cast(ubyte*) malloc(pixel_count * pal_img_n); 2033 if (p == null) 2034 throw new STBImageException("Out of memory"); 2035 2036 // between here and free(out) below, exitting would leak 2037 temp_out = p; 2038 2039 if (pal_img_n == 3) { 2040 for (i=0; i < pixel_count; ++i) { 2041 int n = orig[i]*4; 2042 p[0] = palette[n ]; 2043 p[1] = palette[n+1]; 2044 p[2] = palette[n+2]; 2045 p += 3; 2046 } 2047 } else { 2048 for (i=0; i < pixel_count; ++i) { 2049 int n = orig[i]*4; 2050 p[0] = palette[n ]; 2051 p[1] = palette[n+1]; 2052 p[2] = palette[n+2]; 2053 p[3] = palette[n+3]; 2054 p += 4; 2055 } 2056 } 2057 free(a.out_); 2058 a.out_ = temp_out; 2059 2060 return 1; 2061 } 2062 2063 int parse_png_file(png *z, int scan, int req_comp) 2064 { 2065 ubyte[1024] palette; 2066 ubyte pal_img_n=0; 2067 ubyte has_trans=0; 2068 ubyte tc[3]; 2069 uint ioff=0, idata_limit=0, i, pal_len=0; 2070 int first=1,k,interlace=0; 2071 stbi *s = z.s; 2072 2073 z.expanded = null; 2074 z.idata = null; 2075 z.out_ = null; 2076 2077 if (!check_png_header(s)) return 0; 2078 2079 if (scan == SCAN_type) return 1; 2080 2081 for (;;) { 2082 chunk c = get_chunk_header(s); 2083 switch (c.type) { 2084 case PNG_TYPE('I','H','D','R'): { 2085 int depth,color,comp,filter; 2086 if (!first) throw new STBImageException("Multiple IHDR, corrupt PNG"); 2087 first = 0; 2088 if (c.length != 13) throw new STBImageException("Bad IHDR len, corrupt PNG"); 2089 s.img_x = get32(s); if (s.img_x > (1 << 24)) throw new STBImageException("Very large image (corrupt?)"); 2090 s.img_y = get32(s); if (s.img_y > (1 << 24)) throw new STBImageException("Very large image (corrupt?)"); 2091 depth = get8(s); if (depth != 8) throw new STBImageException("8bit only, PNG not supported: 8-bit only"); 2092 color = get8(s); if (color > 6) throw new STBImageException("Bad ctype, corrupt PNG"); 2093 if (color == 3) pal_img_n = 3; else if (color & 1) throw new STBImageException("Bad ctype, corrupt PNG"); 2094 comp = get8(s); if (comp) throw new STBImageException("Bad comp method, corrupt PNG"); 2095 filter= get8(s); if (filter) throw new STBImageException("Bad filter method, corrupt PNG"); 2096 interlace = get8(s); if (interlace>1) throw new STBImageException("Bad interlace method, corrupt PNG"); 2097 if (!s.img_x || !s.img_y) throw new STBImageException("0-pixel image, corrupt PNG"); 2098 if (!pal_img_n) { 2099 s.img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); 2100 if ((1 << 30) / s.img_x / s.img_n < s.img_y) throw new STBImageException("Image too large to decode"); 2101 if (scan == SCAN_header) return 1; 2102 } else { 2103 // if paletted, then pal_n is our final components, and 2104 // img_n is # components to decompress/filter. 2105 s.img_n = 1; 2106 if ((1 << 30) / s.img_x / 4 < s.img_y) throw new STBImageException("Too large, corrupt PNG"); 2107 // if SCAN_header, have to scan to see if we have a tRNS 2108 } 2109 break; 2110 } 2111 2112 case PNG_TYPE('P','L','T','E'): { 2113 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2114 if (c.length > 256*3) throw new STBImageException("invalid PLTE, corrupt PNG"); 2115 pal_len = c.length / 3; 2116 if (pal_len * 3 != c.length) throw new STBImageException("invalid PLTE, corrupt PNG"); 2117 for (i=0; i < pal_len; ++i) { 2118 palette[i*4+0] = get8u(s); 2119 palette[i*4+1] = get8u(s); 2120 palette[i*4+2] = get8u(s); 2121 palette[i*4+3] = 255; 2122 } 2123 break; 2124 } 2125 2126 case PNG_TYPE('t','R','N','S'): { 2127 if (first) throw new STBImageException("first not IHDR, cCorrupt PNG"); 2128 if (z.idata) throw new STBImageException("tRNS after IDAT, corrupt PNG"); 2129 if (pal_img_n) { 2130 if (scan == SCAN_header) { s.img_n = 4; return 1; } 2131 if (pal_len == 0) throw new STBImageException("tRNS before PLTE, corrupt PNG"); 2132 if (c.length > pal_len) throw new STBImageException("bad tRNS len, corrupt PNG"); 2133 pal_img_n = 4; 2134 for (i=0; i < c.length; ++i) 2135 palette[i*4+3] = get8u(s); 2136 } else { 2137 if (!(s.img_n & 1)) throw new STBImageException("tRNS with alpha, corrupt PNG"); 2138 if (c.length != cast(uint) s.img_n*2) throw new STBImageException("bad tRNS len, corrupt PNG"); 2139 has_trans = 1; 2140 for (k=0; k < s.img_n; ++k) 2141 tc[k] = cast(ubyte) get16(s); // non 8-bit images will be larger 2142 } 2143 break; 2144 } 2145 2146 case PNG_TYPE('I','D','A','T'): { 2147 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2148 if (pal_img_n && !pal_len) throw new STBImageException("no PLTE, corrupt PNG"); 2149 if (scan == SCAN_header) { s.img_n = pal_img_n; return 1; } 2150 if (ioff + c.length > idata_limit) { 2151 ubyte *p; 2152 if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; 2153 while (ioff + c.length > idata_limit) 2154 idata_limit *= 2; 2155 p = cast(ubyte*) realloc(z.idata, idata_limit); if (p == null) throw new STBImageException("outofmem, cOut of memory"); 2156 z.idata = p; 2157 } 2158 if (!getn(s, z.idata+ioff,c.length)) throw new STBImageException("outofdata, corrupt PNG"); 2159 ioff += c.length; 2160 break; 2161 } 2162 2163 case PNG_TYPE('I','E','N','D'): { 2164 uint raw_len; 2165 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2166 if (scan != SCAN_load) return 1; 2167 if (z.idata == null) throw new STBImageException("no IDAT, corrupt PNG"); 2168 z.expanded = stbi_zlib_decode_malloc_guesssize_headerflag(z.idata, ioff, 16384, cast(int *) &raw_len, 1); 2169 if (z.expanded == null) return 0; // zlib should set error 2170 free(z.idata); z.idata = null; 2171 if ((req_comp == s.img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) 2172 s.img_out_n = s.img_n+1; 2173 else 2174 s.img_out_n = s.img_n; 2175 if (!create_png_image(z, z.expanded, raw_len, s.img_out_n, interlace)) return 0; 2176 if (has_trans) 2177 if (!compute_transparency(z, tc, s.img_out_n)) return 0; 2178 if (pal_img_n) { 2179 // pal_img_n == 3 or 4 2180 s.img_n = pal_img_n; // record the actual colors we had 2181 s.img_out_n = pal_img_n; 2182 if (req_comp >= 3) s.img_out_n = req_comp; 2183 if (!expand_palette(z, palette.ptr, pal_len, s.img_out_n)) 2184 return 0; 2185 } 2186 free(z.expanded); z.expanded = null; 2187 return 1; 2188 } 2189 2190 default: 2191 // if critical, fail 2192 if (first) throw new STBImageException("first not IHDR, corrupt PNG"); 2193 if ((c.type & (1 << 29)) == 0) { 2194 2195 throw new STBImageException("PNG not supported: unknown chunk type"); 2196 } 2197 skip(s, c.length); 2198 break; 2199 } 2200 // end of chunk, read and skip CRC 2201 get32(s); 2202 } 2203 } 2204 2205 ubyte *do_png(png *p, int *x, int *y, int *n, int req_comp) 2206 { 2207 ubyte *result=null; 2208 if (req_comp < 0 || req_comp > 4) 2209 throw new STBImageException("Internal error: bad req_comp"); 2210 if (parse_png_file(p, SCAN_load, req_comp)) { 2211 result = p.out_; 2212 p.out_ = null; 2213 if (req_comp && req_comp != p.s.img_out_n) { 2214 result = convert_format(result, p.s.img_out_n, req_comp, p.s.img_x, p.s.img_y); 2215 p.s.img_out_n = req_comp; 2216 if (result == null) return result; 2217 } 2218 *x = p.s.img_x; 2219 *y = p.s.img_y; 2220 if (n) *n = p.s.img_n; 2221 } 2222 free(p.out_); p.out_ = null; 2223 free(p.expanded); p.expanded = null; 2224 free(p.idata); p.idata = null; 2225 2226 return result; 2227 } 2228 2229 ubyte *stbi_png_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2230 { 2231 png p; 2232 p.s = s; 2233 return do_png(&p, x,y,comp,req_comp); 2234 } 2235 2236 void stbi_png_test(stbi *s) 2237 { 2238 int r = check_png_header(s); 2239 if (r == 0) 2240 throw new STBImageException("Couldn't decode PNG header"); 2241 } 2242 2243 // Microsoft/Windows BMP image 2244 2245 void stbi_bmp_test(stbi *s) 2246 { 2247 if (get8(s) != 'B') throw new STBImageException("Couldn't decode BMP header"); 2248 if (get8(s) != 'M') throw new STBImageException("Couldn't decode BMP header"); 2249 get32le(s); // discard filesize 2250 get16le(s); // discard reserved 2251 get16le(s); // discard reserved 2252 get32le(s); // discard data offset 2253 int sz = get32le(s); 2254 if (sz == 12 || sz == 40 || sz == 56 || sz == 108) 2255 return; 2256 2257 throw new STBImageException("Couldn't decode BMP header"); 2258 } 2259 2260 2261 // returns 0..31 for the highest set bit 2262 int high_bit(uint z) 2263 { 2264 int n=0; 2265 if (z == 0) return -1; 2266 if (z >= 0x10000) n += 16, z >>= 16; 2267 if (z >= 0x00100) n += 8, z >>= 8; 2268 if (z >= 0x00010) n += 4, z >>= 4; 2269 if (z >= 0x00004) n += 2, z >>= 2; 2270 if (z >= 0x00002) n += 1, z >>= 1; 2271 return n; 2272 } 2273 2274 int bitcount(uint a) 2275 { 2276 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 2277 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 2278 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits 2279 a = (a + (a >> 8)); // max 16 per 8 bits 2280 a = (a + (a >> 16)); // max 32 per 8 bits 2281 return a & 0xff; 2282 } 2283 2284 int shiftsigned(int v, int shift, int bits) 2285 { 2286 int result; 2287 int z=0; 2288 2289 if (shift < 0) v <<= -shift; 2290 else v >>= shift; 2291 result = v; 2292 2293 z = bits; 2294 while (z < 8) { 2295 result += v >> z; 2296 z += bits; 2297 } 2298 return result; 2299 } 2300 2301 ubyte *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2302 { 2303 ubyte *out_; 2304 uint mr=0,mg=0,mb=0,ma=0, fake_a=0; 2305 ubyte pal[256][4]; 2306 int psize=0,i,j,compress=0,width; 2307 int bpp, flip_vertically, pad, target, offset, hsz; 2308 if (get8(s) != 'B' || get8(s) != 'M') throw new STBImageException("not BMP, Corrupt BMP"); 2309 get32le(s); // discard filesize 2310 get16le(s); // discard reserved 2311 get16le(s); // discard reserved 2312 offset = get32le(s); 2313 hsz = get32le(s); 2314 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) throw new STBImageException("unknown BMP, BMP type not supported: unknown"); 2315 if (hsz == 12) { 2316 s.img_x = get16le(s); 2317 s.img_y = get16le(s); 2318 } else { 2319 s.img_x = get32le(s); 2320 s.img_y = get32le(s); 2321 } 2322 if (get16le(s) != 1) throw new STBImageException("bad BMP"); 2323 bpp = get16le(s); 2324 if (bpp == 1) throw new STBImageException("monochrome, BMP type not supported: 1-bit"); 2325 flip_vertically = (cast(int) s.img_y) > 0; 2326 s.img_y = abs(cast(int) s.img_y); 2327 if (hsz == 12) { 2328 if (bpp < 24) 2329 psize = (offset - 14 - 24) / 3; 2330 } else { 2331 compress = get32le(s); 2332 if (compress == 1 || compress == 2) throw new STBImageException("BMP RLE, BMP type not supported: RLE"); 2333 get32le(s); // discard sizeof 2334 get32le(s); // discard hres 2335 get32le(s); // discard vres 2336 get32le(s); // discard colorsused 2337 get32le(s); // discard max important 2338 if (hsz == 40 || hsz == 56) { 2339 if (hsz == 56) { 2340 get32le(s); 2341 get32le(s); 2342 get32le(s); 2343 get32le(s); 2344 } 2345 if (bpp == 16 || bpp == 32) { 2346 mr = mg = mb = 0; 2347 if (compress == 0) { 2348 if (bpp == 32) { 2349 mr = 0xffu << 16; 2350 mg = 0xffu << 8; 2351 mb = 0xffu << 0; 2352 ma = 0xffu << 24; 2353 fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255 2354 } else { 2355 mr = 31u << 10; 2356 mg = 31u << 5; 2357 mb = 31u << 0; 2358 } 2359 } else if (compress == 3) { 2360 mr = get32le(s); 2361 mg = get32le(s); 2362 mb = get32le(s); 2363 // not documented, but generated by photoshop and handled by mspaint 2364 if (mr == mg && mg == mb) { 2365 // ?!?!? 2366 throw new STBImageException("bad BMP"); 2367 } 2368 } else 2369 throw new STBImageException("bad BMP"); 2370 } 2371 } else { 2372 assert(hsz == 108); 2373 mr = get32le(s); 2374 mg = get32le(s); 2375 mb = get32le(s); 2376 ma = get32le(s); 2377 get32le(s); // discard color space 2378 for (i=0; i < 12; ++i) 2379 get32le(s); // discard color space parameters 2380 } 2381 if (bpp < 16) 2382 psize = (offset - 14 - hsz) >> 2; 2383 } 2384 s.img_n = ma ? 4 : 3; 2385 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 2386 target = req_comp; 2387 else 2388 target = s.img_n; // if they want monochrome, we'll post-convert 2389 out_ = cast(ubyte*) malloc(target * s.img_x * s.img_y); 2390 if (!out_) throw new STBImageException("Out of memory"); 2391 if (bpp < 16) { 2392 int z=0; 2393 if (psize == 0 || psize > 256) { free(out_); throw new STBImageException("invalid, Corrupt BMP"); } 2394 for (i=0; i < psize; ++i) { 2395 pal[i][2] = get8u(s); 2396 pal[i][1] = get8u(s); 2397 pal[i][0] = get8u(s); 2398 if (hsz != 12) get8(s); 2399 pal[i][3] = 255; 2400 } 2401 skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); 2402 if (bpp == 4) width = (s.img_x + 1) >> 1; 2403 else if (bpp == 8) width = s.img_x; 2404 else { free(out_); throw new STBImageException("bad bpp, corrupt BMP"); } 2405 pad = (-width)&3; 2406 for (j=0; j < cast(int) s.img_y; ++j) { 2407 for (i=0; i < cast(int) s.img_x; i += 2) { 2408 int v=get8(s),v2=0; 2409 if (bpp == 4) { 2410 v2 = v & 15; 2411 v >>= 4; 2412 } 2413 out_[z++] = pal[v][0]; 2414 out_[z++] = pal[v][1]; 2415 out_[z++] = pal[v][2]; 2416 if (target == 4) out_[z++] = 255; 2417 if (i+1 == cast(int) s.img_x) break; 2418 v = (bpp == 8) ? get8(s) : v2; 2419 out_[z++] = pal[v][0]; 2420 out_[z++] = pal[v][1]; 2421 out_[z++] = pal[v][2]; 2422 if (target == 4) out_[z++] = 255; 2423 } 2424 skip(s, pad); 2425 } 2426 } else { 2427 int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; 2428 int z = 0; 2429 int easy=0; 2430 skip(s, offset - 14 - hsz); 2431 if (bpp == 24) width = 3 * s.img_x; 2432 else if (bpp == 16) width = 2*s.img_x; 2433 else /* bpp = 32 and pad = 0 */ width=0; 2434 pad = (-width) & 3; 2435 if (bpp == 24) { 2436 easy = 1; 2437 } else if (bpp == 32) { 2438 if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) 2439 easy = 2; 2440 } 2441 if (!easy) { 2442 if (!mr || !mg || !mb) { free(out_); throw new STBImageException("bad masks, corrupt BMP"); } 2443 // right shift amt to put high bit in position #7 2444 rshift = high_bit(mr)-7; rcount = bitcount(mr); 2445 gshift = high_bit(mg)-7; gcount = bitcount(mr); 2446 bshift = high_bit(mb)-7; bcount = bitcount(mr); 2447 ashift = high_bit(ma)-7; acount = bitcount(mr); 2448 } 2449 for (j=0; j < cast(int) s.img_y; ++j) { 2450 if (easy) { 2451 for (i=0; i < cast(int) s.img_x; ++i) { 2452 int a; 2453 out_[z+2] = get8u(s); 2454 out_[z+1] = get8u(s); 2455 out_[z+0] = get8u(s); 2456 z += 3; 2457 a = (easy == 2 ? get8(s) : 255); 2458 if (target == 4) out_[z++] = cast(ubyte) a; 2459 } 2460 } else { 2461 for (i=0; i < cast(int) s.img_x; ++i) { 2462 uint v = (bpp == 16 ? get16le(s) : get32le(s)); 2463 int a; 2464 out_[z++] = cast(ubyte) shiftsigned(v & mr, rshift, rcount); 2465 out_[z++] = cast(ubyte) shiftsigned(v & mg, gshift, gcount); 2466 out_[z++] = cast(ubyte) shiftsigned(v & mb, bshift, bcount); 2467 a = (ma ? shiftsigned(v & ma, ashift, acount) : 255); 2468 if (target == 4) out_[z++] = cast(ubyte) a; 2469 } 2470 } 2471 skip(s, pad); 2472 } 2473 } 2474 if (flip_vertically) { 2475 ubyte t; 2476 for (j=0; j < cast(int) s.img_y>>1; ++j) { 2477 ubyte *p1 = out_ + j *s.img_x*target; 2478 ubyte *p2 = out_ + (s.img_y-1-j)*s.img_x*target; 2479 for (i=0; i < cast(int) s.img_x*target; ++i) { 2480 t = p1[i], p1[i] = p2[i], p2[i] = t; 2481 } 2482 } 2483 } 2484 2485 if (req_comp && req_comp != target) { 2486 out_ = convert_format(out_, target, req_comp, s.img_x, s.img_y); 2487 if (out_ == null) return out_; // convert_format frees input on failure 2488 } 2489 2490 *x = s.img_x; 2491 *y = s.img_y; 2492 if (comp) *comp = s.img_n; 2493 return out_; 2494 } 2495 2496 ubyte *stbi_bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2497 { 2498 return bmp_load(s, x,y,comp,req_comp); 2499 } 2500 2501 // ************************************************************************************************* 2502 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb 2503 struct stbi_gif_lzw 2504 { 2505 short prefix; 2506 ubyte first; 2507 ubyte suffix; 2508 } 2509 2510 struct stbi_gif 2511 { 2512 int w,h; 2513 ubyte *out_; // output buffer (always 4 components) 2514 int flags, bgindex, ratio, transparent, eflags; 2515 ubyte pal[256][4]; 2516 ubyte lpal[256][4]; 2517 stbi_gif_lzw codes[4096]; 2518 ubyte *color_table; 2519 int parse, step; 2520 int lflags; 2521 int start_x, start_y; 2522 int max_x, max_y; 2523 int cur_x, cur_y; 2524 int line_size; 2525 } 2526 2527 void stbi_gif_test(stbi *s) 2528 { 2529 int sz; 2530 if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') 2531 throw new STBImageException("Couldn't decode GIF header"); 2532 sz = get8(s); 2533 if (sz != '9' && sz != '7') 2534 throw new STBImageException("Couldn't decode GIF header"); 2535 if (get8(s) != 'a') 2536 throw new STBImageException("Couldn't decode GIF header"); 2537 } 2538 2539 void stbi_gif_parse_colortable(stbi *s, ubyte pal[256][4], int num_entries, int transp) 2540 { 2541 int i; 2542 for (i=0; i < num_entries; ++i) { 2543 pal[i][2] = get8u(s); 2544 pal[i][1] = get8u(s); 2545 pal[i][0] = get8u(s); 2546 pal[i][3] = transp ? 0 : 255; 2547 } 2548 } 2549 2550 int stbi_gif_header(stbi *s, stbi_gif *g, int *comp, int is_info) 2551 { 2552 ubyte version_; 2553 if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') 2554 throw new STBImageException("not GIF, corrupt GIF"); 2555 2556 version_ = get8u(s); 2557 if (version_ != '7' && version_ != '9') throw new STBImageException("not GIF, corrupt GIF"); 2558 if (get8(s) != 'a') throw new STBImageException("not GIF, corrupt GIF"); 2559 2560 g.w = get16le(s); 2561 g.h = get16le(s); 2562 g.flags = get8(s); 2563 g.bgindex = get8(s); 2564 g.ratio = get8(s); 2565 g.transparent = -1; 2566 2567 if (comp != null) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments 2568 2569 if (is_info) return 1; 2570 2571 if (g.flags & 0x80) 2572 stbi_gif_parse_colortable(s,g.pal, 2 << (g.flags & 7), -1); 2573 2574 return 1; 2575 } 2576 2577 void stbi_out_gif_code(stbi_gif *g, ushort code) 2578 { 2579 ubyte *p; 2580 ubyte *c; 2581 2582 // recurse to decode the prefixes, since the linked-list is backwards, 2583 // and working backwards through an interleaved image would be nasty 2584 if (g.codes[code].prefix >= 0) 2585 stbi_out_gif_code(g, g.codes[code].prefix); 2586 2587 if (g.cur_y >= g.max_y) return; 2588 2589 p = (&g.out_[g.cur_x + g.cur_y]); 2590 c = &g.color_table[g.codes[code].suffix * 4]; 2591 2592 if (c[3] >= 128) { 2593 p[0] = c[2]; 2594 p[1] = c[1]; 2595 p[2] = c[0]; 2596 p[3] = c[3]; 2597 } 2598 g.cur_x += 4; 2599 2600 if (g.cur_x >= g.max_x) { 2601 g.cur_x = g.start_x; 2602 g.cur_y += g.step; 2603 2604 while (g.cur_y >= g.max_y && g.parse > 0) { 2605 g.step = (1 << g.parse) * g.line_size; 2606 g.cur_y = g.start_y + (g.step >> 1); 2607 --g.parse; 2608 } 2609 } 2610 } 2611 2612 ubyte *stbi_process_gif_raster(stbi *s, stbi_gif *g) 2613 { 2614 ubyte lzw_cs; 2615 int len, code; 2616 uint first; 2617 int codesize, codemask, avail, oldcode, bits, valid_bits, clear; 2618 stbi_gif_lzw *p; 2619 2620 lzw_cs = get8u(s); 2621 clear = 1 << lzw_cs; 2622 first = 1; 2623 codesize = lzw_cs + 1; 2624 codemask = (1 << codesize) - 1; 2625 bits = 0; 2626 valid_bits = 0; 2627 for (code = 0; code < clear; code++) { 2628 g.codes[code].prefix = -1; 2629 g.codes[code].first = cast(ubyte) code; 2630 g.codes[code].suffix = cast(ubyte) code; 2631 } 2632 2633 // support no starting clear code 2634 avail = clear+2; 2635 oldcode = -1; 2636 2637 len = 0; 2638 for(;;) { 2639 if (valid_bits < codesize) { 2640 if (len == 0) { 2641 len = get8(s); // start new block 2642 if (len == 0) 2643 return g.out_; 2644 } 2645 --len; 2646 bits |= cast(int) get8(s) << valid_bits; 2647 valid_bits += 8; 2648 } else { 2649 int code_ = bits & codemask; 2650 bits >>= codesize; 2651 valid_bits -= codesize; 2652 // @OPTIMIZE: is there some way we can accelerate the non-clear path? 2653 if (code_ == clear) { // clear code 2654 codesize = lzw_cs + 1; 2655 codemask = (1 << codesize) - 1; 2656 avail = clear + 2; 2657 oldcode = -1; 2658 first = 0; 2659 } else if (code_ == clear + 1) { // end of stream code 2660 skip(s, len); 2661 while ((len = get8(s)) > 0) 2662 skip(s,len); 2663 return g.out_; 2664 } else if (code_ <= avail) { 2665 if (first) throw new STBImageException("no clear code, corrupt GIF"); 2666 2667 if (oldcode >= 0) { 2668 p = &g.codes[avail++]; 2669 if (avail > 4096) throw new STBImageException("too many codes, corrupt GIF"); 2670 p.prefix = cast(short) oldcode; 2671 p.first = g.codes[oldcode].first; 2672 p.suffix = (code_ == avail) ? p.first : g.codes[code_].first; 2673 } else if (code_ == avail) 2674 throw new STBImageException("illegal code in raster, corrupt GIF"); 2675 2676 stbi_out_gif_code(g, cast(ushort) code); 2677 2678 if ((avail & codemask) == 0 && avail <= 0x0FFF) { 2679 codesize++; 2680 codemask = (1 << codesize) - 1; 2681 } 2682 2683 oldcode = code_; 2684 } else { 2685 throw new STBImageException("illegal code in raster, corrupt GIF"); 2686 } 2687 } 2688 } 2689 } 2690 2691 void stbi_fill_gif_background(stbi_gif *g) 2692 { 2693 int i; 2694 ubyte *c = g.pal[g.bgindex].ptr; 2695 // @OPTIMIZE: write a dword at a time 2696 for (i = 0; i < g.w * g.h * 4; i += 4) { 2697 ubyte *p = &g.out_[i]; 2698 p[0] = c[2]; 2699 p[1] = c[1]; 2700 p[2] = c[0]; 2701 p[3] = c[3]; 2702 } 2703 } 2704 2705 // this function is designed to support animated gifs, although stb_image doesn't support it 2706 ubyte *stbi_gif_load_next(stbi *s, stbi_gif *g, int *comp, int req_comp) 2707 { 2708 int i; 2709 ubyte *old_out = null; 2710 2711 if (g.out_ == null) { 2712 if (!stbi_gif_header(s, g, comp,0)) return null; // failure_reason set by stbi_gif_header 2713 g.out_ = cast(ubyte*) malloc(4 * g.w * g.h); 2714 if (g.out_ == null) throw new STBImageException("Out of memory"); 2715 stbi_fill_gif_background(g); 2716 } else { 2717 // animated-gif-only path 2718 if (((g.eflags & 0x1C) >> 2) == 3) { 2719 old_out = g.out_; 2720 g.out_ = cast(ubyte*) malloc(4 * g.w * g.h); 2721 if (g.out_ == null) throw new STBImageException("Out of memory"); 2722 memcpy(g.out_, old_out, g.w*g.h*4); 2723 } 2724 } 2725 2726 for (;;) { 2727 switch (get8(s)) { 2728 case 0x2C: /* Image Descriptor */ 2729 { 2730 int x, y, w, h; 2731 ubyte *o; 2732 2733 x = get16le(s); 2734 y = get16le(s); 2735 w = get16le(s); 2736 h = get16le(s); 2737 if (((x + w) > (g.w)) || ((y + h) > (g.h))) 2738 throw new STBImageException("bad Image Descriptor, corrupt GIF"); 2739 2740 g.line_size = g.w * 4; 2741 g.start_x = x * 4; 2742 g.start_y = y * g.line_size; 2743 g.max_x = g.start_x + w * 4; 2744 g.max_y = g.start_y + h * g.line_size; 2745 g.cur_x = g.start_x; 2746 g.cur_y = g.start_y; 2747 2748 g.lflags = get8(s); 2749 2750 if (g.lflags & 0x40) { 2751 g.step = 8 * g.line_size; // first interlaced spacing 2752 g.parse = 3; 2753 } else { 2754 g.step = g.line_size; 2755 g.parse = 0; 2756 } 2757 2758 if (g.lflags & 0x80) { 2759 stbi_gif_parse_colortable(s,g.lpal, 2 << (g.lflags & 7), g.eflags & 0x01 ? g.transparent : -1); 2760 g.color_table = &g.lpal[0][0]; 2761 } else if (g.flags & 0x80) { 2762 for (i=0; i < 256; ++i) // @OPTIMIZE: reset only the previous transparent 2763 g.pal[i][3] = 255; 2764 if (g.transparent >= 0 && (g.eflags & 0x01)) 2765 g.pal[g.transparent][3] = 0; 2766 g.color_table = &g.pal[0][0]; 2767 } else 2768 throw new STBImageException("missing color table, corrupt GIF"); 2769 2770 o = stbi_process_gif_raster(s, g); 2771 if (o == null) return null; 2772 2773 if (req_comp && req_comp != 4) 2774 o = convert_format(o, 4, req_comp, g.w, g.h); 2775 return o; 2776 } 2777 2778 case 0x21: // Comment Extension. 2779 { 2780 int len; 2781 if (get8(s) == 0xF9) { // Graphic Control Extension. 2782 len = get8(s); 2783 if (len == 4) { 2784 g.eflags = get8(s); 2785 get16le(s); // delay 2786 g.transparent = get8(s); 2787 } else { 2788 skip(s, len); 2789 break; 2790 } 2791 } 2792 while ((len = get8(s)) != 0) 2793 skip(s, len); 2794 break; 2795 } 2796 2797 case 0x3B: // gif stream termination code 2798 return cast(ubyte*) 1; 2799 2800 default: 2801 throw new STBImageException("unknown code, corrupt GIF"); 2802 } 2803 } 2804 } 2805 2806 ubyte *stbi_gif_load(stbi *s, int *x, int *y, int *comp, int req_comp) 2807 { 2808 ubyte *u = null; 2809 stbi_gif g={0}; 2810 2811 u = stbi_gif_load_next(s, &g, comp, req_comp); 2812 if (u == cast(void *) 1) u = null; // end of animated gif marker 2813 if (u) { 2814 *x = g.w; 2815 *y = g.h; 2816 } 2817 2818 return u; 2819 } 2820 2821