1 module gfm.core.text; 2 3 import std.file, 4 std.utf, 5 std.conv, 6 std.encoding, 7 std.array, 8 std.c..string; 9 10 import std.experimental.logger; 11 12 13 /// Reads a text file at once. 14 /// Bugs: Remove in favor of std.file.read. 15 // This means shaders must compile from a single string instead of string[]. 16 string[] readTextFile(string filename) 17 { 18 try 19 { 20 string data = readText(filename); 21 return [data]; 22 } 23 catch(FileException e) 24 { 25 return []; 26 } 27 catch(UTFException e) 28 { 29 return []; 30 } 31 } 32 33 /// Sanitize a C string from a library. 34 /// Returns: Sanitized UTF-8 string. Invalid UTF-8 sequences are replaced by question marks. 35 string sanitizeUTF8(const(char*) inputZ) 36 { 37 return sanitizeUTF8(inputZ, null, null); 38 } 39 40 /// Sanitize a C string from a library. 41 /// Returns: Sanitized UTF-8 string. Invalid UTF-8 sequences generate warning messages. 42 string sanitizeUTF8(const(char*) inputZ, Logger logger, string source) 43 { 44 assert(inputZ != null); 45 size_t inputLength = strlen(inputZ); 46 47 auto result = appender!string(); 48 result.reserve(inputLength); 49 50 bool foundInvalid = false; 51 52 size_t i = 0; 53 while(i < inputLength) 54 { 55 dchar ch = inputZ[i]; 56 try 57 { 58 ch = std.utf.decode(cast(string)inputZ[0..inputLength], i); 59 } 60 catch(UTFException) 61 { 62 foundInvalid = true; 63 ++i; 64 } 65 char[4] dst; 66 auto len = std.utf.encode(dst, ch); 67 result.put(dst[0 .. len]); 68 } 69 70 // optionally, warn that input had invalid UTF-8 71 if (foundInvalid && logger !is null) 72 logger.warningf("got invalid UTF-8 sequence from %s", source); 73 74 return result.data; 75 } 76