1 module gfm.core.text;
2 
3 import std.file,
4        std.utf,
5        std.conv,
6        std.encoding,
7        std.array,
8        std.c..string;
9 
10 import std.experimental.logger;
11 
12 
13 /// Reads a text file at once.
14 /// Bugs: Remove in favor of std.file.read.
15 //        This means shaders must compile from a single string instead of string[].
16 string[] readTextFile(string filename)
17 {
18     try
19     {
20         string data = readText(filename);
21         return [data];
22     }
23     catch(FileException e)
24     {
25         return [];
26     }
27     catch(UTFException e)
28     {
29         return [];
30     }    
31 }
32 
33 /// Sanitize a C string from a library.
34 /// Returns: Sanitized UTF-8 string. Invalid UTF-8 sequences are replaced by question marks.
35 string sanitizeUTF8(const(char*) inputZ)
36 {
37     return sanitizeUTF8(inputZ, null, null);
38 }
39 
40 /// Sanitize a C string from a library.
41 /// Returns: Sanitized UTF-8 string. Invalid UTF-8 sequences generate warning messages.
42 string sanitizeUTF8(const(char*) inputZ, Logger logger, string source)
43 {
44     assert(inputZ != null);
45     size_t inputLength = strlen(inputZ);
46 
47     auto result = appender!string();
48     result.reserve(inputLength);
49 
50     bool foundInvalid = false;
51 
52     size_t i = 0;
53     while(i < inputLength)
54     {
55         dchar ch = inputZ[i];
56         try
57         {
58             ch = std.utf.decode(cast(string)inputZ[0..inputLength], i);
59         }
60         catch(UTFException)
61         { 
62             foundInvalid = true;
63             ++i; 
64         }    
65         char[4] dst;
66         auto len = std.utf.encode(dst, ch);
67         result.put(dst[0 .. len]);
68     }
69 
70     // optionally, warn that input had invalid UTF-8
71     if (foundInvalid && logger !is null)
72         logger.warningf("got invalid UTF-8 sequence from %s", source);
73 
74     return result.data;
75 }
76