1 module gfm.net.uri; 2 3 import std.range, 4 std.string, 5 std.ascii, 6 std.socket; 7 8 9 10 /// Exception thrown when an URI doesn't parse. 11 class URIException : Exception 12 { 13 public 14 { 15 @safe pure nothrow this(string message, string file =__FILE__, size_t line = __LINE__, Throwable next = null) 16 { 17 super(message, file, line, next); 18 } 19 } 20 } 21 22 /** 23 24 An attempt at implementing URI (RFC 3986). 25 26 All constructed URI are valid and normalized. 27 Bugs: 28 $(UL 29 $(LI Separate segments in parsed form.) 30 $(LI Relative URL combining.) 31 $(LI . and .. normalization.) 32 ) 33 34 Alternative: 35 Consider using $(WEB vibed.org,vibe.d) if you need something better. 36 */ 37 class URI 38 { 39 public 40 { 41 enum HostType 42 { 43 NONE, 44 REG_NAME, /// Host has a registered name. 45 IPV4, /// Host has an IPv4 46 IPV6, /// Host has an IPv6 47 IPVFUTURE /// Unknown yet scheme. 48 } 49 50 /// Creactes an URI from an input range, throws if invalid. 51 /// Input should be an ENCODED url range. 52 /// Throws: $(D URIException) if the URI is invalid. 53 this(T)(T input) if (isForwardRange!T) 54 { 55 _scheme = null; 56 _hostType = HostType.NONE; 57 _hostName = null; 58 _port = -1; 59 _userInfo = null; 60 _path = null; 61 _query = null; 62 _fragment = null; 63 parseURI(input); 64 } 65 66 /// Checks URI validity. 67 /// Returns: true if input is valid. 68 static bool isValid(T)(T input) /* pure */ nothrow 69 { 70 try 71 { 72 try 73 { 74 URI uri = new URI(input); 75 return true; 76 } 77 catch (URIException e) 78 { 79 return false; 80 } 81 } 82 catch (Exception e) 83 { 84 assert(false); // came here? Fix the library by writing the missing catch-case. 85 } 86 } 87 88 // getters for normalized URI components 89 90 /// Returns: URI scheme, guaranteed not null. 91 string scheme() pure const nothrow 92 { 93 return _scheme; 94 } 95 96 /// Returns: Host name, or null if not available. 97 string hostName() pure const nothrow 98 { 99 return _hostName; 100 } 101 102 /// Returns: Host type (HostType.NONE if not available). 103 HostType hostType() pure const nothrow 104 { 105 return _hostType; 106 } 107 108 /** 109 * Returns: port number. 110 * If none is provided by the URI, return the default port for this scheme. 111 * If the scheme isn't recognized, return -1. 112 */ 113 int port() pure const nothrow 114 { 115 if (_port != -1) 116 return _port; 117 118 foreach (ref e; knownSchemes) 119 if (e.scheme == _scheme) 120 return e.defaultPort; 121 122 return -1; 123 } 124 125 /// Returns: User-info part of the URI, or null if not available. 126 string userInfo() pure const nothrow 127 { 128 return _userInfo; 129 } 130 131 /// Returns: Path part of the URI, never null, can be the empty string. 132 string path() pure const nothrow 133 { 134 return _path; 135 } 136 137 /// Returns: Query part of the URI, or null if not available. 138 string query() pure const nothrow 139 { 140 return _query; 141 } 142 143 /// Returns: Fragment part of the URI, or null if not available. 144 string fragment() pure const nothrow 145 { 146 return _fragment; 147 } 148 149 /// Returns: Authority part of the URI. 150 string authority() pure const nothrow 151 { 152 if (_hostName is null) 153 return null; 154 155 string res = ""; 156 if (_userInfo !is null) 157 res = res ~ _userInfo ~ "@"; 158 res ~= _hostName; 159 if (_port != -1) 160 res = res ~ ":" ~ itos(_port); 161 return res; 162 } 163 164 /// Resolves URI host name. 165 /// Returns: std.socket.Address from the URI. 166 Address resolveAddress() 167 { 168 final switch(_hostType) 169 { 170 case HostType.REG_NAME: 171 case HostType.IPV4: 172 return new InternetAddress(_hostName, cast(ushort)port()); 173 174 case HostType.IPV6: 175 return new Internet6Address(_hostName, cast(ushort)port()); 176 177 case HostType.IPVFUTURE: 178 case HostType.NONE: 179 throw new URIException("Cannot resolve such host"); 180 } 181 } 182 183 /// Returns: Pretty string representation. 184 override string toString() const 185 { 186 string res = _scheme ~ ":"; 187 188 if (_hostName is null) 189 res = res ~ "//" ~ authority(); 190 res ~= _path; 191 if (_query !is null) 192 res = res ~ "?" ~ _query; 193 if (_fragment !is null) 194 res = res ~ "#" ~ _fragment; 195 return res; 196 } 197 198 /// Semantic comparison of two URIs. 199 /// They are equals if they have the same normalized string representation. 200 bool opEquals(U)(U other) pure const nothrow if (is(U : FixedPoint)) 201 { 202 return value == other.value; 203 } 204 } 205 206 private 207 { 208 // normalized URI components 209 string _scheme; // never null, never empty 210 string _userInfo; // can be null 211 HostType _hostType; // what the hostname string is (NONE if no host in URI) 212 string _hostName; // null if no authority in URI 213 int _port; // -1 if no port in URI 214 string _path; // never null, bu could be empty 215 string _query; // can be null 216 string _fragment; // can be null 217 218 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 219 void parseURI(T)(ref T input) 220 { 221 _scheme = toLower(parseScheme(input)); 222 consume(input, ':'); 223 parseHierPart(input); 224 225 if (input.empty) 226 return; 227 228 char c = popChar(input); 229 230 if (c == '?') 231 { 232 _query = parseQuery(input); 233 234 if (input.empty) 235 return; 236 237 c = popChar(input); 238 } 239 240 if (c == '#') 241 { 242 _fragment = parseFragment(input); 243 } 244 245 if (!input.empty) 246 throw new URIException("unexpected characters at end of URI"); 247 } 248 249 string parseScheme(T)(ref T input) 250 { 251 string result = ""; 252 char c = popChar(input); 253 if (!isAlpha(c)) 254 throw new URIException("expected alpha character in URI scheme"); 255 256 result ~= c; 257 258 while(!input.empty) 259 { 260 c = peekChar(input); 261 262 if (isAlpha(c) || isDigit(c) || "+-.".contains(c)) 263 { 264 result ~= c; 265 input.popFront(); 266 } 267 else 268 break; 269 } 270 return result; 271 } 272 273 // hier-part = "//" authority path-abempty 274 // / path-absolute 275 // / path-rootless 276 // / path-empty 277 void parseHierPart(T)(ref T input) 278 { 279 if (input.empty()) 280 return; // path-empty 281 282 char c = peekChar(input); 283 if (c == '/') 284 { 285 input.popFront(); 286 T sinput = input.save; 287 if (!input.empty() && peekChar(input) == '/') 288 { 289 consume(input, '/'); 290 parseAuthority(input); 291 _path = parseAbEmpty(input); 292 } 293 else 294 { 295 input = sinput.save; 296 _path = parsePathAbsolute(input); 297 } 298 } 299 else 300 { 301 _path = parsePathRootless(input); 302 } 303 } 304 305 // authority = [ userinfo "@" ] host [ ":" port ] 306 void parseAuthority(T)(ref T input) 307 { 308 // trying to parse user 309 T uinput = input.save; 310 try 311 { 312 _userInfo = parseUserinfo(input); 313 consume(input, '@'); 314 } 315 catch(URIException e) 316 { 317 // no user name in URI 318 _userInfo = null; 319 input = uinput.save; 320 } 321 322 parseHost(input, _hostName, _hostType); 323 324 if (!empty(input) && peekChar(input) == ':') 325 { 326 consume(input, ':'); 327 _port = parsePort(input); 328 } 329 } 330 331 string parsePcharString(T)(ref T input, bool allowColon, bool allowAt, bool allowSlashQuestionMark) 332 { 333 string res = ""; 334 335 while(!input.empty) 336 { 337 char c = peekChar(input); 338 339 if (isUnreserved(c) || isSubDelim(c)) 340 res ~= popChar(input); 341 else if (c == '%') 342 res ~= parsePercentEncodedChar(input); 343 else if (c == ':' && allowColon) 344 res ~= popChar(input); 345 else if (c == '@' && allowAt) 346 res ~= popChar(input); 347 else if ((c == '?' || c == '/') && allowSlashQuestionMark) 348 res ~= popChar(input); 349 else 350 break; 351 } 352 return res; 353 } 354 355 356 void parseHost(T)(ref T input, out string res, out HostType hostType) 357 { 358 char c = peekChar(input); 359 if (c == '[') 360 parseIPLiteral(input, res, hostType); 361 else 362 { 363 T iinput = input.save; 364 try 365 { 366 hostType = HostType.IPV4; 367 res = parseIPv4Address(input); 368 } 369 catch (URIException e) 370 { 371 input = iinput.save; 372 hostType = HostType.REG_NAME; 373 res = toLower(parseRegName(input)); 374 } 375 } 376 } 377 378 void parseIPLiteral(T)(ref T input, out string res, out HostType hostType) 379 { 380 consume(input, '['); 381 if (peekChar(input) == 'v') 382 { 383 hostType = HostType.IPVFUTURE; 384 res = parseIPv6OrFutureAddress(input); 385 } 386 else 387 { 388 hostType = HostType.IPV6; 389 string ipv6 = parseIPv6OrFutureAddress(input); 390 391 // validate and expand IPv6 (for normalizaton to be effective for comparisons) 392 try 393 { 394 ubyte[16] bytes = Internet6Address.parse(ipv6); 395 res = ""; 396 foreach (i ; 0..16) 397 { 398 if ((i & 1) == 0 && i != 0) 399 res ~= ":"; 400 res ~= format("%02x", bytes[i]); 401 } 402 } 403 catch(SocketException e) 404 { 405 // IPv6 address did not parse 406 throw new URIException(e.msg); 407 } 408 } 409 consume(input, ']'); 410 } 411 412 string parseIPv6OrFutureAddress(T)(ref T input) 413 { 414 string res = ""; 415 while (peekChar(input) != ']') 416 res ~= popChar(input); 417 return res; 418 } 419 420 string parseIPv4Address(T)(ref T input) 421 { 422 int a = parseDecOctet(input); 423 consume(input, '.'); 424 int b = parseDecOctet(input); 425 consume(input, '.'); 426 int c = parseDecOctet(input); 427 consume(input, '.'); 428 int d = parseDecOctet(input); 429 return format("%s.%s.%s.%s", a, b, c, d); 430 } 431 432 // dec-octet = DIGIT ; 0-9 433 // / %x31-39 DIGIT ; 10-99 434 // / "1" 2DIGIT ; 100-199 435 // / "2" %x30-34 DIGIT ; 200-249 436 // / "25" %x30-35 ; 250-255 437 int parseDecOctet(T)(ref T input) 438 { 439 int res = popDigit(input); 440 441 if (!input.empty && isDigit(peekChar(input))) 442 { 443 res = 10 * res + popDigit(input); 444 445 if (!input.empty && isDigit(peekChar(input))) 446 res = 10 * res + popDigit(input); 447 } 448 449 if (res > 255) 450 throw new URIException("out of range number in IPv4 address"); 451 452 return res; 453 } 454 455 // query = *( pchar / "/" / "?" ) 456 string parseQuery(T)(ref T input) 457 { 458 return parsePcharString(input, true, true, true); 459 } 460 461 // fragment = *( pchar / "/" / "?" ) 462 string parseFragment(T)(ref T input) 463 { 464 return parsePcharString(input, true, true, true); 465 } 466 467 // pct-encoded = "%" HEXDIG HEXDIG 468 char parsePercentEncodedChar(T)(ref T input) 469 { 470 consume(input, '%'); 471 472 int char1Val = hexValue(popChar(input)); 473 int char2Val = hexValue(popChar(input)); 474 return cast(char)(char1Val * 16 + char2Val); 475 } 476 477 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 478 string parseUserinfo(T)(ref T input) 479 { 480 return parsePcharString(input, true, false, false); 481 } 482 483 // reg-name = *( unreserved / pct-encoded / sub-delims ) 484 string parseRegName(T)(ref T input) 485 { 486 return parsePcharString(input, false, false, false); 487 } 488 489 // port = *DIGIT 490 int parsePort(T)(ref T input) 491 { 492 int res = 0; 493 494 while(!input.empty) 495 { 496 char c = peekChar(input); 497 if (!isDigit(c)) 498 break; 499 res = res * 10 + popDigit(input); 500 } 501 return res; 502 } 503 504 // segment = *pchar 505 // segment-nz = 1*pchar 506 // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 507 string parseSegment(T)(ref T input, bool allowZero, bool allowColon) 508 { 509 string res = parsePcharString(input, allowColon, true, false); 510 if (!allowZero && res == "") 511 throw new URIException("expected a non-zero segment in URI"); 512 return res; 513 } 514 515 // path-abempty = *( "/" segment ) 516 string parseAbEmpty(T)(ref T input) 517 { 518 string res = ""; 519 while (!input.empty) 520 { 521 if (peekChar(input) != '/') 522 break; 523 consume(input, '/'); 524 res = res ~ "/" ~ parseSegment(input, true, true); 525 } 526 return res; 527 } 528 529 // path-absolute = "/" [ segment-nz *( "/" segment ) ] 530 string parsePathAbsolute(T)(ref T input) 531 { 532 consume(input, '/'); 533 string res = "/"; 534 535 try 536 { 537 res ~= parseSegment(input, false, true); 538 } 539 catch(URIException e) 540 { 541 return res; 542 } 543 544 res ~= parseAbEmpty(input); 545 return res; 546 } 547 548 string parsePathNoSlash(T)(ref T input, bool allowColonInFirstSegment) 549 { 550 string res = parseSegment(input, false, allowColonInFirstSegment); 551 res ~= parseAbEmpty(input); 552 return res; 553 } 554 555 // path-noscheme = segment-nz-nc *( "/" segment ) 556 string parsePathNoScheme(T)(ref T input) 557 { 558 return parsePathNoSlash(input, false); 559 } 560 561 // path-rootless = segment-nz *( "/" segment ) 562 string parsePathRootless(T)(ref T input) 563 { 564 return parsePathNoSlash(input, true); 565 } 566 } 567 } 568 569 private pure 570 { 571 bool contains(string s, char c) nothrow 572 { 573 foreach(char sc; s) 574 if (c == sc) 575 return true; 576 return false; 577 } 578 579 bool isAlpha(char c) nothrow 580 { 581 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 582 } 583 584 bool isDigit(char c) nothrow 585 { 586 return c >= '0' && c <= '9'; 587 } 588 589 bool isHexDigit(char c) nothrow 590 { 591 return hexValue(c) != -1; 592 } 593 594 bool isUnreserved(char c) nothrow 595 { 596 return isAlpha(c) || isDigit(c) || "-._~".contains(c); 597 } 598 599 bool isReserved(char c) nothrow 600 { 601 return isGenDelim(c) || isSubDelim(c); 602 } 603 604 bool isGenDelim(char c) nothrow 605 { 606 return ":/?#[]@".contains(c); 607 } 608 609 bool isSubDelim(char c) nothrow 610 { 611 return "!$&'()*+,;=".contains(c); 612 } 613 614 int hexValue(char c) nothrow 615 { 616 if (isDigit(c)) 617 return c - '0'; 618 else if (c >= 'a' && c <= 'f') 619 return c - 'a'; 620 else if (c >= 'A' && c <= 'F') 621 return c - 'A'; 622 else 623 return -1; 624 } 625 626 // peek char from input range, or throw 627 char peekChar(T)(ref T input) 628 { 629 if (input.empty()) 630 throw new URIException("expected character"); 631 632 dchar c = input.front; 633 634 if (cast(int)c >= 127) 635 throw new URIException("US-ASCII character expected"); 636 637 return cast(char)c; 638 } 639 640 // pop char from input range, or throw 641 char popChar(T)(ref T input) 642 { 643 char result = peekChar(input); 644 input.popFront(); 645 return result; 646 } 647 648 int popDigit(T)(ref T input) 649 { 650 char c = popChar(input); 651 if (!isDigit(c)) 652 throw new URIException("expected digit character"); 653 return hexValue(c); 654 } 655 656 void consume(T)(ref T input, char expected) 657 { 658 char c = popChar(input); 659 if (c != expected) 660 throw new URIException("expected '" ~ c ~ "' character"); 661 } 662 663 string itos(int i) pure nothrow 664 { 665 string res = ""; 666 do 667 { 668 res = ('0' + (i % 10)) ~ res; 669 i = i / 10; 670 } while (i != 0); 671 return res; 672 } 673 674 struct KnownScheme 675 { 676 string scheme; 677 int defaultPort; 678 } 679 680 enum knownSchemes = 681 [ 682 KnownScheme("ftp", 21), 683 KnownScheme("sftp", 22), 684 KnownScheme("telnet", 23), 685 KnownScheme("smtp", 25), 686 KnownScheme("gopher", 70), 687 KnownScheme("http", 80), 688 KnownScheme("nntp", 119), 689 KnownScheme("https", 443) 690 ]; 691 692 } 693 694 unittest 695 { 696 697 { 698 string s = "HTTP://machin@fr.wikipedia.org:80/wiki/Uniform_Resource_Locator?Query%20Part=4#fragment%20part"; 699 assert(URI.isValid(s)); 700 auto uri = new URI(s); 701 assert(uri.scheme() == "http"); 702 assert(uri.userInfo() == "machin"); 703 assert(uri.hostName() == "fr.wikipedia.org"); 704 assert(uri.port() == 80); 705 assert(uri.authority() == "machin@fr.wikipedia.org:80"); 706 assert(uri.path() == "/wiki/Uniform_Resource_Locator"); 707 assert(uri.query() == "Query Part=4"); 708 assert(uri.fragment() == "fragment part"); 709 } 710 711 // host tests 712 { 713 assert((new URI("http://truc.org")).hostType() == URI.HostType.REG_NAME); 714 assert((new URI("http://127.0.0.1")).hostType() == URI.HostType.IPV4); 715 assert((new URI("http://[2001:db8::7]")).hostType() == URI.HostType.IPV6); 716 assert((new URI("http://[v9CrazySchemeFromOver9000year]")).hostType() == URI.HostType.IPVFUTURE); 717 } 718 719 auto wellFormedURIs = 720 [ 721 "ftp://ftp.rfc-editor.org/in-notes/rfc2396.txt", 722 "mailto:Quidam.no-spam@example.com", 723 "news:fr.comp.infosystemes.www.auteurs", 724 "gopher://gopher.quux.org/", 725 "http://Jojo:lApIn@www.example.com:8888/chemin/d/acc%C3%A8s.php?q=req&q2=req2#signet", 726 "ldap://[2001:db8::7]/c=GB?objectClass?one", 727 "mailto:John.Doe@example.com", 728 "tel:+1-816-555-1212", 729 "telnet://192.0.2.16:80/", 730 "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", 731 "about:", 732 ]; 733 734 foreach (wuri; wellFormedURIs) 735 { 736 bool valid = URI.isValid(wuri); 737 assert(valid); 738 } 739 }