1 // taken from https://github.com/ldc-developers/ldc/blob/829dc71114eaf7c769208f03eb9a614dafd789c3/driver/config.d 2 // License: three-clause BSD (https://github.com/ldc-developers/ldc/blob/master/LICENSE) 3 // Authors: Martin Kinkelin (@kinke), Remi Thebault (@rtbo) 4 5 //===-- driver/config.d - LDC config file parsing -----------------*- D -*-===// 6 // 7 // LDC – the LLVM D compiler 8 // 9 // This file is distributed under the BSD-style LDC license. See the LICENSE 10 // file for details. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // Parsing engine for the LDC config file (ldc2.conf). 15 // 16 //===----------------------------------------------------------------------===// 17 module external.ldc.config; 18 19 import core.stdc.ctype; 20 import core.stdc.stdio; 21 import core.stdc.string; 22 23 24 class Setting 25 { 26 enum Type 27 { 28 scalar, 29 array, 30 group, 31 } 32 33 this(string name, Type type) 34 { 35 _name = name; 36 _type = type; 37 } 38 39 @property string name() const 40 { 41 return _name; 42 } 43 44 @property Type type() const 45 { 46 return _type; 47 } 48 49 private string _name; 50 private Type _type; 51 } 52 53 54 class ScalarSetting : Setting 55 { 56 this(string name, string val) 57 { 58 super(name, Type.scalar); 59 _val = val; 60 } 61 62 @property string val() const 63 { 64 return _val; 65 } 66 67 private string _val; 68 } 69 70 71 class ArraySetting : Setting 72 { 73 this(string name, string[] vals) 74 { 75 super(name, Type.array); 76 _vals = vals; 77 } 78 79 @property const(string)[] vals() const 80 { 81 return _vals; 82 } 83 84 private string[] _vals; 85 } 86 87 class GroupSetting : Setting 88 { 89 this(string name, Setting[] children) 90 { 91 super(name, Type.group); 92 _children = children; 93 } 94 95 @property const(Setting)[] children() const 96 { 97 return _children; 98 } 99 100 private Setting[] _children; 101 } 102 103 104 Setting[] parseConfigFile(string filename) 105 { 106 import std.file : readText; 107 108 auto content = readText(filename); 109 110 // skip UTF-8 BOM 111 if (content.length >= 3 && content[0 .. 3] == "\xEF\xBB\xBF") 112 content = content[3 .. $]; 113 114 auto parser = Parser(cast(string) content, filename); 115 return parser.parseConfig(); 116 } 117 118 119 private: 120 121 /+ 122 123 What follows is a recursive descent parser that reads the following 124 EBNF grammar. 125 It is a subset of the libconfig grammar (http://www.hyperrealm.com/libconfig). 126 127 config = { ows , setting } , ows ; 128 setting = (name | string) , (":" | "=") , value , [";" | ","] ; 129 name = alpha , { alpha | digit | "_" | "-" } ; 130 value = string | array | group ; 131 array = "[" , ows , 132 { string , ows , "," , ows } , 133 "]" ; 134 group = "{" , ows , { setting , ows } , "}" ; 135 string = ( quotstr , { ows , quotstr } ) | 136 ( btstr , { ows, btstr } ) ; 137 quotstr = '"' , { ? any char but '"', '\n' and '\r' ? | escseq } , '"' ; 138 escseq = "\" , ["\" | '"' | "r" | "n" | "t" ] ; 139 btstr = '`' , { ? any char but '`' ? } , '`' ; 140 alpha = ? any char between "a" and "z" included 141 or between "A" and "Z" included ? ; 142 digit = ? any char between "0" and "9" included ? ; 143 ows = [ ws ] ; (* optional white space *) 144 ws = ? white space (space, tab, line feed ...) ? ; 145 146 147 Single line comments are also supported. They start with "//" and span until 148 line feed. 149 The "//" sequence is however allowed within strings and doesn't need to be 150 escaped. 151 White space are significant only within strings. 152 Physical line feeds are not allowed within strings. To span a string over 153 multiple lines, use concatenation ("hello " "world" == "hello world"). 154 The following escape sequences are allowed in strings: 155 - \\ 156 - \" 157 - \r 158 - \n 159 - \t 160 161 +/ 162 163 enum Token 164 { 165 name, 166 assign, // ':' or '=' 167 str, 168 lbrace, // '{' 169 rbrace, // '}' 170 lbracket, // '[' 171 rbracket, // ']' 172 semicolon, // ';' 173 comma, // ',' 174 unknown, 175 eof, 176 } 177 178 string humanReadableToken(in Token tok) 179 { 180 final switch(tok) 181 { 182 case Token.name: return `"name"`; 183 case Token.assign: return `':' or '='`; 184 case Token.str: return `"string"`; 185 case Token.lbrace: return `'{'`; 186 case Token.rbrace: return `'}'`; 187 case Token.lbracket: return `'['`; 188 case Token.rbracket: return `']'`; 189 case Token.semicolon: return `';'`; 190 case Token.comma: return `','`; 191 case Token.unknown: return `"unknown token"`; 192 case Token.eof: return `"end of file"`; 193 } 194 } 195 196 struct Parser 197 { 198 string filename; 199 string content; 200 int index; 201 int lineNum = 1; 202 203 char lastChar = ' '; 204 205 static struct Ahead 206 { 207 Token tok; 208 string s; 209 } 210 Ahead ahead; 211 Ahead* aheadp; 212 213 this(string content, string filename = null) 214 { 215 this.filename = filename; 216 this.content = content; 217 } 218 219 void error(in string msg) 220 { 221 enum fmt = "Error while reading config file: %.*s\nline %d: %.*s"; 222 char[1024] buf; 223 auto len = snprintf(buf.ptr, buf.length, fmt, cast(int) filename.length, 224 filename.ptr, lineNum, cast(int) msg.length, msg.ptr); 225 throw new Exception(buf[0 .. len].idup); 226 } 227 228 char getChar() 229 { 230 if (index == content.length) 231 return '\0'; 232 const c = content[index++]; 233 if (c == '\n') 234 ++lineNum; 235 return c; 236 } 237 238 Token getTok(out string outStr) 239 { 240 if (aheadp) 241 { 242 immutable tok = aheadp.tok; 243 outStr = aheadp.s; 244 aheadp = null; 245 return tok; 246 } 247 248 while (isspace(lastChar)) 249 { 250 lastChar = getChar(); 251 } 252 253 if (lastChar == '/') 254 { 255 lastChar = getChar(); 256 if (lastChar != '/') 257 { 258 outStr = "/"; 259 return Token.unknown; 260 } 261 262 do 263 { 264 lastChar = getChar(); 265 } 266 while (lastChar != '\n' && lastChar != '\0'); 267 return getTok(outStr); 268 } 269 270 if (isalpha(lastChar)) 271 { 272 string name; 273 do 274 { 275 name ~= lastChar; 276 lastChar = getChar(); 277 } 278 while (isalnum(lastChar) || lastChar == '_' || lastChar == '-'); 279 outStr = name; 280 return Token.name; 281 } 282 283 switch (lastChar) 284 { 285 case ':': 286 case '=': 287 lastChar = getChar(); 288 return Token.assign; 289 case ';': 290 lastChar = getChar(); 291 return Token.semicolon; 292 case ',': 293 lastChar = getChar(); 294 return Token.comma; 295 case '{': 296 lastChar = getChar(); 297 return Token.lbrace; 298 case '}': 299 lastChar = getChar(); 300 return Token.rbrace; 301 case '[': 302 lastChar = getChar(); 303 return Token.lbracket; 304 case ']': 305 lastChar = getChar(); 306 return Token.rbracket; 307 case '\0': 308 return Token.eof; 309 default: 310 break; 311 } 312 313 if (lastChar == '"') 314 { 315 string str; 316 while (lastChar == '"') 317 { 318 while (1) 319 { 320 lastChar = getChar(); 321 if (lastChar == '"') break; 322 if (lastChar == '\n' || lastChar == '\r') 323 { 324 error("Unexpected end of line in string literal"); 325 } 326 else if (lastChar == '\0') 327 { 328 error("Unexpected end of file in string literal"); 329 } 330 if (lastChar == '\\') 331 { 332 lastChar = getChar(); 333 switch(lastChar) 334 { 335 case '\\': 336 case '"': 337 break; 338 case 'r': 339 lastChar = '\r'; 340 break; 341 case 'n': 342 lastChar = '\n'; 343 break; 344 case 't': 345 lastChar = '\t'; 346 break; 347 default: 348 error("Unexpected escape sequence: \\" ~ lastChar); 349 break; 350 } 351 } 352 str ~= lastChar; 353 } 354 lastChar = getChar(); 355 while (isspace(lastChar)) lastChar = getChar(); 356 } 357 358 outStr = str; 359 return Token.str; 360 } 361 362 if (lastChar == '`') 363 { 364 string str; 365 while (lastChar == '`') 366 { 367 while (1) 368 { 369 lastChar = getChar(); 370 if (lastChar == '`') break; 371 if (lastChar == '\0') 372 { 373 error("Unexpected end of file in string literal"); 374 } 375 str ~= lastChar; 376 } 377 lastChar = getChar(); 378 while (isspace(lastChar)) lastChar = getChar(); 379 } 380 381 outStr = str; 382 return Token.str; 383 } 384 385 outStr = [lastChar]; 386 lastChar = getChar(); 387 return Token.unknown; 388 } 389 390 void ungetTok(in Token tok, in string s) 391 { 392 assert(!aheadp, "can only have one look ahead"); 393 ahead.tok = tok; 394 ahead.s = s; 395 aheadp = &ahead; 396 } 397 398 void unexpectedTokenError(in Token tok, in Token expected, string s) 399 { 400 s = s.length ? " ("~s~")" : ""; 401 error("Was expecting token " ~ humanReadableToken(expected) ~ 402 ". Got " ~ humanReadableToken(tok) ~ s ~ " instead."); 403 } 404 405 string accept(in Token expected) 406 { 407 string s; 408 immutable tok = getTok(s); 409 if (tok != expected) 410 { 411 unexpectedTokenError(tok, expected, s); 412 } 413 return s; 414 } 415 416 Setting[] parseConfig() 417 { 418 Setting[] res; 419 while (1) 420 { 421 { 422 string s; 423 auto t = getTok(s); 424 if (t == Token.eof) 425 { 426 break; 427 } 428 ungetTok(t, s); 429 } 430 res ~= parseSetting(); 431 } 432 return res; 433 } 434 435 Setting parseSetting() 436 { 437 string name; 438 auto t = getTok(name); 439 if (t != Token.name && t != Token.str) 440 { 441 unexpectedTokenError(t, Token.name, name); 442 assert(false); 443 } 444 445 accept(Token.assign); 446 447 Setting res = parseValue(name); 448 449 string s; 450 t = getTok(s); 451 if (t != Token.semicolon && t != Token.comma) 452 { 453 ungetTok(t, s); 454 } 455 456 return res; 457 } 458 459 Setting parseValue(string name) 460 { 461 string s; 462 auto t = getTok(s); 463 if (t == Token.str) 464 { 465 return new ScalarSetting(name, s); 466 } 467 else if (t == Token.lbracket) 468 { 469 string[] arrVal; 470 while (1) 471 { 472 // get string or rbracket 473 t = getTok(s); 474 switch(t) 475 { 476 case Token.str: 477 arrVal ~= s; 478 break; 479 case Token.rbracket: 480 return new ArraySetting(name, arrVal); 481 default: 482 unexpectedTokenError(t, Token.str, s); 483 assert(false); 484 } 485 486 // get comma or rbracket 487 t = getTok(s); 488 switch(t) 489 { 490 case Token.comma: 491 break; 492 case Token.rbracket: 493 return new ArraySetting(name, arrVal); 494 default: 495 unexpectedTokenError(t, Token.comma, s); 496 assert(false); 497 } 498 } 499 } 500 else if (t == Token.lbrace) 501 { 502 Setting[] grpVal; 503 while (1) 504 { 505 t = getTok(s); 506 if (t == Token.rbrace) 507 { 508 return new GroupSetting(name, grpVal); 509 } 510 ungetTok(t, s); 511 grpVal ~= parseSetting(); 512 } 513 } 514 error("Was expecting value."); 515 assert(false); 516 } 517 } 518 519 unittest 520 { 521 static void testScalar(string input, string expected) 522 { 523 auto setting = Parser(input).parseValue(null); 524 assert(setting.type == Setting.Type.scalar); 525 assert((cast(ScalarSetting) setting).val == expected); 526 } 527 528 testScalar(`""`, ""); 529 testScalar(`"abc\r\ndef\t\"quoted/\\123\""`, 530 "abc\r\ndef\t\"quoted/\\123\""); 531 testScalar(`"concatenated" " multiline" 532 " strings"`, "concatenated multiline strings"); 533 testScalar("`abc\n\\ //comment \"`", 534 "abc\n\\ //comment \""); 535 testScalar(`"Üņïčöđë"`, "Üņïčöđë"); 536 } 537 538 unittest 539 { 540 static void testArray(string input, string[] expected) 541 { 542 auto setting = Parser(input).parseValue(null); 543 assert(setting.type == Setting.Type.array); 544 assert((cast(ArraySetting) setting).vals == expected); 545 } 546 547 testArray(`[]`, []); 548 testArray(`[ "a" ]`, [ "a" ]); 549 testArray(`[ "a", ]`, [ "a" ]); 550 testArray(`[ "a", "b" ]`, [ "a", "b" ]); 551 testArray(`[ 552 // comment 553 "a", 554 // comment 555 "b" 556 ]`, [ "a", "b" ]); 557 } 558 559 unittest 560 { 561 enum input = 562 `// comment 563 564 // comment 565 group-1_2: {}; 566 // comment 567 "86(_64)?-.*linux\\.?": 568 { 569 // comment 570 scalar = "abc"; 571 // comment 572 Array_1-2 = [ "a" ]; 573 }; 574 `; 575 576 auto settings = Parser(input).parseConfig(); 577 assert(settings.length == 2); 578 579 assert(settings[0].name == "group-1_2"); 580 assert(settings[0].type == Setting.Type.group); 581 assert((cast(GroupSetting) settings[0]).children == []); 582 583 assert(settings[1].name == "86(_64)?-.*linux\\.?"); 584 assert(settings[1].type == Setting.Type.group); 585 auto group2 = cast(GroupSetting) settings[1]; 586 assert(group2.children.length == 2); 587 588 assert(group2.children[0].name == "scalar"); 589 assert(group2.children[0].type == Setting.Type.scalar); 590 assert((cast(ScalarSetting) group2.children[0]).val == "abc"); 591 592 assert(group2.children[1].name == "Array_1-2"); 593 assert(group2.children[1].type == Setting.Type.array); 594 assert((cast(ArraySetting) group2.children[1]).vals == [ "a" ]); 595 }