1 module workspaced.dparseext; 2 3 import std.algorithm; 4 import std.array; 5 import std.experimental.logger; 6 import std.string; 7 8 import dparse.ast; 9 import dparse.lexer; 10 import dparse.parser; 11 import dparse.rollback_allocator; 12 import dsymbol.builtin.names; 13 14 string makeString(in IdentifierOrTemplateChain c) 15 { 16 return c.identifiersOrTemplateInstances.map!(a => a.identifier.text).join("."); 17 } 18 19 string astToString(T, Args...)(in T ast, Args args) 20 { 21 import dparse.formatter : Formatter; 22 23 if (!ast) 24 return null; 25 26 auto app = appender!string(); 27 auto formatter = new Formatter!(typeof(app))(app); 28 formatter.format(ast, args); 29 return app.data; 30 } 31 32 string paramsToString(Dec)(const Dec dec) 33 { 34 import dparse.formatter : Formatter; 35 36 auto app = appender!string(); 37 auto formatter = new Formatter!(typeof(app))(app); 38 39 static if (is(Dec == FunctionDeclaration) || is(Dec == Constructor)) 40 { 41 formatter.format(dec.parameters); 42 } 43 else static if (is(Dec == TemplateDeclaration)) 44 { 45 formatter.format(dec.templateParameters); 46 } 47 48 return app.data; 49 } 50 51 /// Other tokens 52 private enum dynamicTokens = [ 53 "specialTokenSequence", "comment", "identifier", "scriptLine", 54 "whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral", 55 "ifloatLiteral", "intLiteral", "longLiteral", "realLiteral", 56 "irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral", 57 "dstringLiteral", "stringLiteral", "wstringLiteral" 58 ]; 59 60 string tokenText(const Token token) 61 { 62 switch (token.type) 63 { 64 static foreach (T; dynamicTokens) 65 { 66 case tok!T: 67 } 68 return token.text; 69 default: 70 return str(token.type); 71 } 72 } 73 74 size_t textLength(const Token token) 75 { 76 return token.tokenText.length; 77 } 78 79 bool isSomeString(const Token token) 80 { 81 switch (token.type) 82 { 83 case tok!"characterLiteral": 84 case tok!"dstringLiteral": 85 case tok!"stringLiteral": 86 case tok!"wstringLiteral": 87 return true; 88 default: 89 return false; 90 } 91 } 92 93 bool isLikeIdentifier(const Token token) 94 { 95 import workspaced.helpers; 96 97 auto text = token.tokenText; 98 return text.length && text[0].isIdentifierChar; 99 } 100 101 /// Performs a binary search to find the token containing the search location. 102 /// Params: 103 /// tokens = the token array to search in. 104 /// bytes = the byte index the token should be in. 105 /// Returns: the index of the token inside the given tokens array which 106 /// contains the character specified at the given byte. This will be the first 107 /// token that is `tok.index == bytes` or before the next token that is too far. 108 /// If no tokens match, this will return `tokens.length`. 109 /// 110 /// This is equivalent to the following code: 111 /// --- 112 /// foreach (i, tok; tokens) 113 /// { 114 /// if (tok.index == bytes) 115 /// return i; 116 /// else if (tok.index > bytes) 117 /// return i - 1; 118 /// } 119 /// return tokens.length; 120 /// --- 121 size_t tokenIndexAtByteIndex(scope const(Token)[] tokens, size_t bytes) 122 out (v; v <= tokens.length) 123 { 124 if (!tokens.length || tokens[0].index >= bytes) 125 return 0; 126 127 // find where to start using binary search 128 size_t l = 0; 129 size_t r = tokens.length - 1; 130 while (l < r) 131 { 132 size_t m = (l + r) / 2; 133 if (tokens[m].index < bytes) 134 l = m + 1; 135 else 136 r = m - 1; 137 } 138 size_t start = r; 139 140 // search remaining with linear search 141 foreach (i, tok; tokens[start .. $]) 142 { 143 if (tok.index == bytes) 144 return start + i; 145 else if (tok.index > bytes) 146 return start + i - 1; 147 } 148 return tokens.length; 149 } 150 151 /// ditto 152 size_t tokenIndexAtPosition(scope const(Token)[] tokens, uint line, uint column) 153 out (v; v <= tokens.length) 154 { 155 int cmp(Token token) 156 { 157 if (token.line != line) 158 return token.line < line ? -1 : 1; 159 else if (token.column != column) 160 return token.column < column ? -1 : 1; 161 else 162 return 0; 163 } 164 165 if (!tokens.length || cmp(tokens[0]) >= 0) 166 return 0; 167 168 // find where to start using binary search 169 size_t l = 0; 170 size_t r = tokens.length - 1; 171 while (l < r) 172 { 173 size_t m = (l + r) / 2; 174 if (cmp(tokens[m]) < 0) 175 l = m + 1; 176 else 177 r = m - 1; 178 } 179 size_t start = r; 180 181 // search remaining with linear search 182 foreach (i, tok; tokens[start .. $]) 183 { 184 if (cmp(tok) == 0) 185 return start + i; 186 else if (cmp(tok) > 0) 187 return start + i - 1; 188 } 189 return tokens.length; 190 } 191 192 /// 193 unittest 194 { 195 StringCache stringCache = StringCache(StringCache.defaultBucketCount); 196 const(Token)[] tokens = getTokensForParser(cast(ubyte[]) `module foo.bar; 197 198 // ok 199 void main(string[] args) 200 { 201 } 202 203 /// documentation 204 void foo() 205 { 206 } 207 `, LexerConfig.init, &stringCache); 208 209 auto get(size_t bytes) 210 { 211 auto i = tokens.tokenIndexAtByteIndex(bytes); 212 if (i == tokens.length) 213 return tok!"__EOF__"; 214 return tokens[i].type; 215 } 216 217 assert(get(0) == tok!"module"); 218 assert(get(4) == tok!"module"); 219 assert(get(6) == tok!"module"); 220 assert(get(7) == tok!"identifier"); 221 assert(get(9) == tok!"identifier"); 222 assert(get(10) == tok!"."); 223 assert(get(11) == tok!"identifier"); 224 assert(get(16) == tok!";"); 225 assert(get(49) == tok!"{"); 226 assert(get(48) == tok!"{"); 227 assert(get(47) == tok!")"); 228 assert(get(1000) == tok!"__EOF__"); 229 230 // TODO: process trivia fields in libdparse >=0.15.0 when it releases 231 //assert(get(20) == tok!"comment"); 232 assert(get(20) == tok!";"); 233 234 // assert(get(57) == tok!"comment"); 235 } 236 237 bool isSomeString(const IdType type) 238 { 239 switch (type) 240 { 241 case tok!"stringLiteral": 242 case tok!"wstringLiteral": 243 case tok!"dstringLiteral": 244 return true; 245 default: 246 return false; 247 } 248 } 249 250 /// Tries to evaluate an expression if it evaluates to a string. 251 /// Returns: `null` if the resulting value is not a string or could not be 252 /// evaluated. 253 string evaluateExpressionString(const PrimaryExpression expr) 254 in (expr !is null) 255 { 256 return evaluateExpressionString(expr.primary); 257 } 258 259 /// ditto 260 string evaluateExpressionString(const UnaryExpression expr) 261 in (expr !is null) 262 { 263 if (expr.primaryExpression) 264 return evaluateExpressionString(expr.primaryExpression); 265 else 266 return null; 267 } 268 269 /// ditto 270 string evaluateExpressionString(const ExpressionNode expr) 271 in (expr !is null) 272 { 273 // maybe we want to support simple concatenation here some time 274 275 if (auto unary = cast(UnaryExpression) expr) 276 return evaluateExpressionString(unary); 277 else 278 return null; 279 } 280 281 /// ditto 282 string evaluateExpressionString(const Token token) 283 { 284 import dparse.strings : unescapeString, isStringLiteral; 285 286 switch (token.type) 287 { 288 case tok!"stringLiteral": 289 case tok!"wstringLiteral": 290 case tok!"dstringLiteral": 291 auto str = token.text; 292 293 // we want to unquote here 294 // foreach because implicit concatenation can combine multiple strings 295 auto ret = appender!string; 296 scope StringCache cache = StringCache(16); 297 LexerConfig config; 298 config.commentBehavior = CommentBehavior.noIntern; 299 config.stringBehavior = StringBehavior.source; 300 config.whitespaceBehavior = WhitespaceBehavior.skip; 301 config.fileName = "evaluate-string-stdin"; 302 foreach (t; DLexer(str, config, &cache)) 303 { 304 switch (t.type) 305 { 306 case tok!"stringLiteral": 307 case tok!"wstringLiteral": 308 case tok!"dstringLiteral": 309 if (t.text.isStringLiteral) 310 { 311 ret ~= unescapeString(t.text); 312 } 313 else 314 { 315 debug 316 { 317 throw new Exception("Invalid stringLiteral in stringLiteral token: `" ~ t.text ~ '`'); 318 } 319 else 320 { 321 warningf("Invalid stringLiteral in stringLiteral token: `%s`", t.text); 322 return str; 323 } 324 } 325 break; 326 default: 327 // unexpected token, return input because it might already be 328 // unescaped 329 return str; 330 } 331 } 332 333 return ret.data; 334 default: 335 return null; 336 } 337 } 338 339 /// Finds the deepest non-null node of any BaseNode. (like visiting the tree) 340 /// Aborts on types that contain `DeclarationOrStatement` or `Declaration[]` 341 /// fields. 342 /// Useful for getting the IfStatement out of a DeclarationOrStatement without 343 /// traversing its children. 344 BaseNode findDeepestNonBlockNode(T : BaseNode)(T ast) 345 { 346 static assert(!is(T == BaseNode), "Passed in a BaseNode, that's probably not what you wanted to do (pass in the most specific type you have)"); 347 bool nonProcess = false; 348 foreach (member; ast.tupleof) 349 { 350 static if (is(typeof(member) : DeclarationOrStatement) 351 || is(typeof(member) : Declaration[])) 352 { 353 nonProcess = true; 354 } 355 } 356 357 if (nonProcess) 358 return ast; 359 360 foreach (member; ast.tupleof) 361 { 362 static if (is(typeof(member) : BaseNode)) 363 { 364 if (member !is null) 365 { 366 return findDeepestNonBlockNode(member); 367 } 368 } 369 } 370 return ast; 371 } 372 373 /// Gets the final `else` block of an if. Will return a node of type 374 /// `IfStatement` if it's an `else if` block. Returns null if there is no single 375 /// else statement. 376 BaseNode getIfElse(IfStatement ifStmt) 377 { 378 if (!ifStmt.elseStatement) 379 return null; 380 381 while (true) 382 { 383 auto elseStmt = ifStmt.elseStatement; 384 if (!elseStmt) 385 return ifStmt; 386 387 auto stmtInElse = elseStmt.findDeepestNonBlockNode; 388 assert(stmtInElse !is elseStmt); 389 390 if (cast(IfStatement)stmtInElse) 391 ifStmt = cast(IfStatement)stmtInElse; 392 else 393 return stmtInElse; 394 } 395 } 396 397 unittest 398 { 399 StringCache stringCache = StringCache(StringCache.defaultBucketCount); 400 RollbackAllocator rba; 401 IfStatement parseIfStmt(string code) 402 { 403 const(Token)[] tokens = getTokensForParser(cast(ubyte[])code, LexerConfig.init, &stringCache); 404 auto parser = new Parser(); 405 parser.tokens = tokens; 406 parser.allocator = &rba; 407 return parser.parseIfStatement(); 408 } 409 410 alias p = parseIfStmt; 411 assert(getIfElse(p("if (x) {}")) is null); 412 assert(getIfElse(p("if (x) {} else if (y) {}")) !is null); 413 assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {}")) !is null, typeid(getIfElse(p("if (x) {} else if (y) {}"))).name); 414 assert(getIfElse(p("if (x) {} else if (y) {} else {}")) !is null); 415 assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {} else {}")) is null); 416 } 417 418 C[] substr(C)(C[] s, size_t[2] range) 419 { 420 return substr(s, range[0], range[1]); 421 } 422 423 C[] substr(C)(C[] s, size_t start, size_t end) 424 { 425 if (!s.length) 426 return s; 427 if (start < 0) 428 start = 0; 429 if (start >= s.length) 430 start = s.length - 1; // @suppress(dscanner.suspicious.length_subtraction) 431 if (end > s.length) 432 end = s.length; 433 if (end < start) 434 return s[start .. start]; 435 return s[start .. end]; 436 }