1 // taken from https://github.com/ldc-developers/ldc/blob/829dc71114eaf7c769208f03eb9a614dafd789c3/driver/config.d
2 // License: three-clause BSD (https://github.com/ldc-developers/ldc/blob/master/LICENSE)
3 // Authors: Martin Kinkelin (@kinke), Remi Thebault (@rtbo)
4 
5 //===-- driver/config.d - LDC config file parsing -----------------*- D -*-===//
6 //
7 //                         LDC – the LLVM D compiler
8 //
9 // This file is distributed under the BSD-style LDC license. See the LICENSE
10 // file for details.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Parsing engine for the LDC config file (ldc2.conf).
15 //
16 //===----------------------------------------------------------------------===//
17 module external.ldc.config;
18 
19 import core.stdc.ctype;
20 import core.stdc.stdio;
21 import core.stdc.string;
22 
23 
24 class Setting
25 {
26     enum Type
27     {
28         scalar,
29         array,
30         group,
31     }
32 
33     this(string name, Type type)
34     {
35         _name = name;
36         _type = type;
37     }
38 
39     @property string name() const
40     {
41         return _name;
42     }
43 
44     @property Type type() const
45     {
46         return _type;
47     }
48 
49     private string _name;
50     private Type _type;
51 }
52 
53 
54 class ScalarSetting : Setting
55 {
56     this(string name, string val)
57     {
58         super(name, Type.scalar);
59         _val = val;
60     }
61 
62     @property string val() const
63     {
64         return _val;
65     }
66 
67     private string _val;
68 }
69 
70 
71 class ArraySetting : Setting
72 {
73     this(string name, string[] vals)
74     {
75         super(name, Type.array);
76         _vals = vals;
77     }
78 
79     @property const(string)[] vals() const
80     {
81         return _vals;
82     }
83 
84     private string[] _vals;
85 }
86 
87 class GroupSetting : Setting
88 {
89     this(string name, Setting[] children)
90     {
91         super(name, Type.group);
92         _children = children;
93     }
94 
95     @property const(Setting)[] children() const
96     {
97         return _children;
98     }
99 
100     private Setting[] _children;
101 }
102 
103 
104 Setting[] parseConfigFile(string filename)
105 {
106     import std.file : readText;
107 
108     auto content = readText(filename);
109 
110     // skip UTF-8 BOM
111     if (content.length >= 3 && content[0 .. 3] == "\xEF\xBB\xBF")
112         content = content[3 .. $];
113 
114     auto parser = Parser(cast(string) content, filename);
115     return parser.parseConfig();
116 }
117 
118 
119 private:
120 
121 /+
122 
123 What follows is a recursive descent parser that reads the following
124 EBNF grammar.
125 It is a subset of the libconfig grammar (http://www.hyperrealm.com/libconfig).
126 
127 config  =   { ows , setting } , ows ;
128 setting =   (name | string) , (":" | "=") , value , [";" | ","] ;
129 name    =   alpha , { alpha | digit | "_" | "-" } ;
130 value   =   string | array | group ;
131 array   =   "[" , ows ,
132                 { string , ows , "," , ows } ,
133             "]" ;
134 group   =   "{" , ows , { setting , ows } , "}" ;
135 string  =   ( quotstr , { ows , quotstr } ) |
136             ( btstr , { ows, btstr } ) ;
137 quotstr =   '"' , { ? any char but '"', '\n' and '\r' ? | escseq } , '"' ;
138 escseq  =   "\" , ["\" | '"' | "r" | "n" | "t" ] ;
139 btstr   =   '`' , { ? any char but '`' ? } , '`' ;
140 alpha   =   ? any char between "a" and "z" included
141                     or between "A" and "Z" included ? ;
142 digit   =   ? any char between "0" and "9" included ? ;
143 ows     =   [ ws ] ; (* optional white space *)
144 ws      =   ? white space (space, tab, line feed ...) ? ;
145 
146 
147 Single line comments are also supported. They start with "//" and span until
148 line feed.
149 The "//" sequence is however allowed within strings and doesn't need to be
150 escaped.
151 White space are significant only within strings.
152 Physical line feeds are not allowed within strings. To span a string over
153 multiple lines, use concatenation ("hello " "world" == "hello world").
154 The following escape sequences are allowed in strings:
155   - \\
156   - \"
157   - \r
158   - \n
159   - \t
160 
161 +/
162 
163 enum Token
164 {
165     name,
166     assign,         // ':' or '='
167     str,
168     lbrace,         // '{'
169     rbrace,         // '}'
170     lbracket,       // '['
171     rbracket,       // ']'
172     semicolon,      // ';'
173     comma,          // ','
174     unknown,
175     eof,
176 }
177 
178 string humanReadableToken(in Token tok)
179 {
180     final switch(tok)
181     {
182     case Token.name:        return `"name"`;
183     case Token.assign:      return `':' or '='`;
184     case Token.str:         return `"string"`;
185     case Token.lbrace:      return `'{'`;
186     case Token.rbrace:      return `'}'`;
187     case Token.lbracket:    return `'['`;
188     case Token.rbracket:    return `']'`;
189     case Token.semicolon:   return `';'`;
190     case Token.comma:       return `','`;
191     case Token.unknown:     return `"unknown token"`;
192     case Token.eof:         return `"end of file"`;
193     }
194 }
195 
196 struct Parser
197 {
198     string filename;
199     string content;
200     int index;
201     int lineNum = 1;
202 
203     char lastChar = ' ';
204 
205     static struct Ahead
206     {
207         Token tok;
208         string s;
209     }
210     Ahead ahead;
211     Ahead* aheadp;
212 
213     this(string content, string filename = null)
214     {
215         this.filename = filename;
216         this.content = content;
217     }
218 
219     void error(in string msg)
220     {
221         enum fmt = "Error while reading config file: %.*s\nline %d: %.*s";
222         char[1024] buf;
223         auto len = snprintf(buf.ptr, buf.length, fmt, cast(int) filename.length,
224                             filename.ptr, lineNum, cast(int) msg.length, msg.ptr);
225         throw new Exception(buf[0 .. len].idup);
226     }
227 
228     char getChar()
229     {
230         if (index == content.length)
231             return '\0';
232         const c = content[index++];
233         if (c == '\n')
234             ++lineNum;
235         return c;
236     }
237 
238     Token getTok(out string outStr)
239     {
240         if (aheadp)
241         {
242             immutable tok = aheadp.tok;
243             outStr = aheadp.s;
244             aheadp = null;
245             return tok;
246         }
247 
248         while (isspace(lastChar))
249         {
250             lastChar = getChar();
251         }
252 
253         if (lastChar == '/')
254         {
255             lastChar = getChar();
256             if (lastChar != '/')
257             {
258                 outStr = "/";
259                 return Token.unknown;
260             }
261 
262             do
263             {
264                 lastChar = getChar();
265             }
266             while (lastChar != '\n' && lastChar != '\0');
267             return getTok(outStr);
268         }
269 
270         if (isalpha(lastChar))
271         {
272             string name;
273             do
274             {
275                 name ~= lastChar;
276                 lastChar = getChar();
277             }
278             while (isalnum(lastChar) || lastChar == '_' || lastChar == '-');
279             outStr = name;
280             return Token.name;
281         }
282 
283         switch (lastChar)
284         {
285         case ':':
286         case '=':
287             lastChar = getChar();
288             return Token.assign;
289         case ';':
290             lastChar = getChar();
291             return Token.semicolon;
292         case ',':
293             lastChar = getChar();
294             return Token.comma;
295         case '{':
296             lastChar = getChar();
297             return Token.lbrace;
298         case '}':
299             lastChar = getChar();
300             return Token.rbrace;
301         case '[':
302             lastChar = getChar();
303             return Token.lbracket;
304         case ']':
305             lastChar = getChar();
306             return Token.rbracket;
307         case '\0':
308             return Token.eof;
309         default:
310             break;
311         }
312 
313         if (lastChar == '"')
314         {
315             string str;
316             while (lastChar == '"')
317             {
318                 while (1)
319                 {
320                     lastChar = getChar();
321                     if (lastChar == '"') break;
322                     if (lastChar == '\n' || lastChar == '\r')
323                     {
324                         error("Unexpected end of line in string literal");
325                     }
326                     else if (lastChar == '\0')
327                     {
328                         error("Unexpected end of file in string literal");
329                     }
330                     if (lastChar == '\\')
331                     {
332                         lastChar = getChar();
333                         switch(lastChar)
334                         {
335                         case '\\':
336                         case '"':
337                             break;
338                         case 'r':
339                             lastChar = '\r';
340                             break;
341                         case 'n':
342                             lastChar = '\n';
343                             break;
344                         case 't':
345                             lastChar = '\t';
346                             break;
347                         default:
348                             error("Unexpected escape sequence: \\" ~ lastChar);
349                             break;
350                         }
351                     }
352                     str ~= lastChar;
353                 }
354                 lastChar = getChar();
355                 while (isspace(lastChar)) lastChar = getChar();
356             }
357 
358             outStr = str;
359             return Token.str;
360         }
361 
362         if (lastChar == '`')
363         {
364             string str;
365             while (lastChar == '`')
366             {
367                 while (1)
368                 {
369                     lastChar = getChar();
370                     if (lastChar == '`') break;
371                     if (lastChar == '\0')
372                     {
373                         error("Unexpected end of file in string literal");
374                     }
375                     str ~= lastChar;
376                 }
377                 lastChar = getChar();
378                 while (isspace(lastChar)) lastChar = getChar();
379             }
380 
381             outStr = str;
382             return Token.str;
383         }
384 
385         outStr = [lastChar];
386         lastChar = getChar();
387         return Token.unknown;
388     }
389 
390     void ungetTok(in Token tok, in string s)
391     {
392         assert(!aheadp, "can only have one look ahead");
393         ahead.tok = tok;
394         ahead.s = s;
395         aheadp = &ahead;
396     }
397 
398     void unexpectedTokenError(in Token tok, in Token expected, string s)
399     {
400         s = s.length ? " ("~s~")" : "";
401         error("Was expecting token " ~ humanReadableToken(expected) ~
402               ". Got " ~ humanReadableToken(tok) ~ s ~ " instead.");
403     }
404 
405     string accept(in Token expected)
406     {
407         string s;
408         immutable tok = getTok(s);
409         if (tok != expected)
410         {
411             unexpectedTokenError(tok, expected, s);
412         }
413         return s;
414     }
415 
416     Setting[] parseConfig()
417     {
418         Setting[] res;
419         while (1)
420         {
421             {
422                 string s;
423                 auto t = getTok(s);
424                 if (t == Token.eof)
425                 {
426                     break;
427                 }
428                 ungetTok(t, s);
429             }
430             res ~= parseSetting();
431         }
432         return res;
433     }
434 
435     Setting parseSetting()
436     {
437         string name;
438         auto t = getTok(name);
439         if (t != Token.name && t != Token.str)
440         {
441             unexpectedTokenError(t, Token.name, name);
442             assert(false);
443         }
444 
445         accept(Token.assign);
446 
447         Setting res = parseValue(name);
448 
449         string s;
450         t = getTok(s);
451         if (t != Token.semicolon && t != Token.comma)
452         {
453             ungetTok(t, s);
454         }
455 
456         return res;
457     }
458 
459     Setting parseValue(string name)
460     {
461         string s;
462         auto t = getTok(s);
463         if (t == Token.str)
464         {
465             return new ScalarSetting(name, s);
466         }
467         else if (t == Token.lbracket)
468         {
469             string[] arrVal;
470             while (1)
471             {
472                 // get string or rbracket
473                 t = getTok(s);
474                 switch(t)
475                 {
476                 case Token.str:
477                     arrVal ~= s;
478                     break;
479                 case Token.rbracket:
480                     return new ArraySetting(name, arrVal);
481                 default:
482                     unexpectedTokenError(t, Token.str, s);
483                     assert(false);
484                 }
485 
486                 // get comma or rbracket
487                 t = getTok(s);
488                 switch(t)
489                 {
490                 case Token.comma:
491                     break;
492                 case Token.rbracket:
493                     return new ArraySetting(name, arrVal);
494                 default:
495                     unexpectedTokenError(t, Token.comma, s);
496                     assert(false);
497                 }
498             }
499         }
500         else if (t == Token.lbrace)
501         {
502             Setting[] grpVal;
503             while (1)
504             {
505                 t = getTok(s);
506                 if (t == Token.rbrace)
507                 {
508                     return new GroupSetting(name, grpVal);
509                 }
510                 ungetTok(t, s);
511                 grpVal ~= parseSetting();
512             }
513         }
514         error("Was expecting value.");
515         assert(false);
516     }
517 }
518 
519 unittest
520 {
521     static void testScalar(string input, string expected)
522     {
523         auto setting = Parser(input).parseValue(null);
524         assert(setting.type == Setting.Type.scalar);
525         assert((cast(ScalarSetting) setting).val == expected);
526     }
527 
528     testScalar(`""`, "");
529     testScalar(`"abc\r\ndef\t\"quoted/\\123\""`,
530                 "abc\r\ndef\t\"quoted/\\123\"");
531     testScalar(`"concatenated" " multiline"
532                 " strings"`, "concatenated multiline strings");
533     testScalar("`abc\n\\ //comment \"`",
534                 "abc\n\\ //comment \"");
535     testScalar(`"Üņïčöđë"`, "Üņïčöđë");
536 }
537 
538 unittest
539 {
540     static void testArray(string input, string[] expected)
541     {
542         auto setting = Parser(input).parseValue(null);
543         assert(setting.type == Setting.Type.array);
544         assert((cast(ArraySetting) setting).vals == expected);
545     }
546 
547     testArray(`[]`, []);
548     testArray(`[ "a" ]`, [ "a" ]);
549     testArray(`[ "a", ]`, [ "a" ]);
550     testArray(`[ "a", "b" ]`, [ "a", "b" ]);
551     testArray(`[
552             // comment
553             "a",
554             // comment
555             "b"
556         ]`, [ "a", "b" ]);
557 }
558 
559 unittest
560 {
561     enum input =
562 `// comment
563 
564 // comment
565 group-1_2: {};
566 // comment
567 "86(_64)?-.*linux\\.?":
568 {
569     // comment
570     scalar = "abc";
571     // comment
572     Array_1-2 = [ "a" ];
573 };
574 `;
575 
576     auto settings = Parser(input).parseConfig();
577     assert(settings.length == 2);
578 
579     assert(settings[0].name == "group-1_2");
580     assert(settings[0].type == Setting.Type.group);
581     assert((cast(GroupSetting) settings[0]).children == []);
582 
583     assert(settings[1].name == "86(_64)?-.*linux\\.?");
584     assert(settings[1].type == Setting.Type.group);
585     auto group2 = cast(GroupSetting) settings[1];
586     assert(group2.children.length == 2);
587 
588     assert(group2.children[0].name == "scalar");
589     assert(group2.children[0].type == Setting.Type.scalar);
590     assert((cast(ScalarSetting) group2.children[0]).val == "abc");
591 
592     assert(group2.children[1].name == "Array_1-2");
593     assert(group2.children[1].type == Setting.Type.array);
594     assert((cast(ArraySetting) group2.children[1]).vals == [ "a" ]);
595 }