1 module workspaced.dparseext;
2 
3 import std.algorithm;
4 import std.array;
5 import std.experimental.logger;
6 import std.string;
7 
8 import dparse.ast;
9 import dparse.lexer;
10 import dparse.parser;
11 import dparse.rollback_allocator;
12 import dsymbol.builtin.names;
13 
14 string makeString(in IdentifierOrTemplateChain c)
15 {
16 	return c.identifiersOrTemplateInstances.map!(a => a.identifier.text).join(".");
17 }
18 
19 string astToString(T, Args...)(in T ast, Args args)
20 {
21 	import dparse.formatter : Formatter;
22 
23 	if (!ast)
24 		return null;
25 
26 	auto app = appender!string();
27 	auto formatter = new Formatter!(typeof(app))(app);
28 	formatter.format(ast, args);
29 	return app.data;
30 }
31 
32 string paramsToString(Dec)(const Dec dec)
33 {
34 	import dparse.formatter : Formatter;
35 
36 	auto app = appender!string();
37 	auto formatter = new Formatter!(typeof(app))(app);
38 
39 	static if (is(Dec == FunctionDeclaration) || is(Dec == Constructor))
40 	{
41 		formatter.format(dec.parameters);
42 	}
43 	else static if (is(Dec == TemplateDeclaration))
44 	{
45 		formatter.format(dec.templateParameters);
46 	}
47 
48 	return app.data;
49 }
50 
51 /// Other tokens
52 private enum dynamicTokens = [
53 		"specialTokenSequence", "comment", "identifier", "scriptLine",
54 		"whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral",
55 		"ifloatLiteral", "intLiteral", "longLiteral", "realLiteral",
56 		"irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral",
57 		"dstringLiteral", "stringLiteral", "wstringLiteral"
58 	];
59 
60 string tokenText(const Token token)
61 {
62 	switch (token.type)
63 	{
64 		static foreach (T; dynamicTokens)
65 		{
66 	case tok!T:
67 		}
68 		return token.text;
69 	default:
70 		return str(token.type);
71 	}
72 }
73 
74 size_t textLength(const Token token)
75 {
76 	return token.tokenText.length;
77 }
78 
79 bool isSomeString(const Token token)
80 {
81 	switch (token.type)
82 	{
83 	case tok!"characterLiteral":
84 	case tok!"dstringLiteral":
85 	case tok!"stringLiteral":
86 	case tok!"wstringLiteral":
87 		return true;
88 	default:
89 		return false;
90 	}
91 }
92 
93 bool isLikeIdentifier(const Token token)
94 {
95 	import workspaced.helpers;
96 
97 	auto text = token.tokenText;
98 	return text.length && text[0].isIdentifierChar;
99 }
100 
101 /// Performs a binary search to find the token containing the search location.
102 /// Params:
103 ///   tokens = the token array to search in.
104 ///   bytes  = the byte index the token should be in.
105 /// Returns: the index of the token inside the given tokens array which
106 /// contains the character specified at the given byte. This will be the first
107 /// token that is `tok.index == bytes` or before the next token that is too far.
108 /// If no tokens match, this will return `tokens.length`.
109 ///
110 /// This is equivalent to the following code:
111 /// ---
112 /// foreach (i, tok; tokens)
113 /// {
114 /// 	if (tok.index == bytes)
115 /// 		return i;
116 /// 	else if (tok.index > bytes)
117 /// 		return i - 1;
118 /// }
119 /// return tokens.length;
120 /// ---
121 size_t tokenIndexAtByteIndex(scope const(Token)[] tokens, size_t bytes)
122 out (v; v <= tokens.length)
123 {
124 	if (!tokens.length || tokens[0].index >= bytes)
125 		return 0;
126 
127 	// find where to start using binary search
128 	size_t l = 0;
129 	size_t r = tokens.length - 1;
130 	while (l < r)
131 	{
132 		size_t m = (l + r) / 2;
133 		if (tokens[m].index < bytes)
134 			l = m + 1;
135 		else
136 			r = m - 1;
137 	}
138 	size_t start = r;
139 
140 	// search remaining with linear search
141 	foreach (i, tok; tokens[start .. $])
142 	{
143 		if (tok.index == bytes)
144 			return start + i;
145 		else if (tok.index > bytes)
146 			return start + i - 1;
147 	}
148 	return tokens.length;
149 }
150 
151 /// ditto
152 size_t tokenIndexAtPosition(scope const(Token)[] tokens, uint line, uint column)
153 out (v; v <= tokens.length)
154 {
155 	int cmp(Token token)
156 	{
157 		if (token.line != line)
158 			return token.line < line ? -1 : 1;
159 		else if (token.column != column)
160 			return token.column < column ? -1 : 1;
161 		else
162 			return 0;
163 	}
164 
165 	if (!tokens.length || cmp(tokens[0]) >= 0)
166 		return 0;
167 
168 	// find where to start using binary search
169 	size_t l = 0;
170 	size_t r = tokens.length - 1;
171 	while (l < r)
172 	{
173 		size_t m = (l + r) / 2;
174 		if (cmp(tokens[m]) < 0)
175 			l = m + 1;
176 		else
177 			r = m - 1;
178 	}
179 	size_t start = r;
180 
181 	// search remaining with linear search
182 	foreach (i, tok; tokens[start .. $])
183 	{
184 		if (cmp(tok) == 0)
185 			return start + i;
186 		else if (cmp(tok) > 0)
187 			return start + i - 1;
188 	}
189 	return tokens.length;
190 }
191 
192 ///
193 unittest
194 {
195 	StringCache stringCache = StringCache(StringCache.defaultBucketCount);
196 	const(Token)[] tokens = getTokensForParser(cast(ubyte[]) `module foo.bar;
197 
198 // ok
199 void main(string[] args)
200 {
201 }
202 
203 /// documentation
204 void foo()
205 {
206 }
207 `, LexerConfig.init, &stringCache);
208 
209 	auto get(size_t bytes)
210 	{
211 		auto i = tokens.tokenIndexAtByteIndex(bytes);
212 		if (i == tokens.length)
213 			return tok!"__EOF__";
214 		return tokens[i].type;
215 	}
216 
217 	assert(get(0) == tok!"module");
218 	assert(get(4) == tok!"module");
219 	assert(get(6) == tok!"module");
220 	assert(get(7) == tok!"identifier");
221 	assert(get(9) == tok!"identifier");
222 	assert(get(10) == tok!".");
223 	assert(get(11) == tok!"identifier");
224 	assert(get(16) == tok!";");
225 	assert(get(49) == tok!"{");
226 	assert(get(48) == tok!"{");
227 	assert(get(47) == tok!")");
228 	assert(get(1000) == tok!"__EOF__");
229 
230 	// TODO: process trivia fields in libdparse >=0.15.0 when it releases
231 	//assert(get(20) == tok!"comment");
232 	assert(get(20) == tok!";");
233 
234 	// assert(get(57) == tok!"comment");
235 }
236 
237 bool isSomeString(const IdType type)
238 {
239 	switch (type)
240 	{
241 	case tok!"stringLiteral":
242 	case tok!"wstringLiteral":
243 	case tok!"dstringLiteral":
244 		return true;
245 	default:
246 		return false;
247 	}
248 }
249 
250 /// Tries to evaluate an expression if it evaluates to a string.
251 /// Returns: `null` if the resulting value is not a string or could not be
252 /// evaluated.
253 string evaluateExpressionString(const PrimaryExpression expr)
254 in (expr !is null)
255 {
256 	return evaluateExpressionString(expr.primary);
257 }
258 
259 /// ditto
260 string evaluateExpressionString(const UnaryExpression expr)
261 in (expr !is null)
262 {
263 	if (expr.primaryExpression)
264 		return evaluateExpressionString(expr.primaryExpression);
265 	else
266 		return null;
267 }
268 
269 /// ditto
270 string evaluateExpressionString(const ExpressionNode expr)
271 in (expr !is null)
272 {
273 	// maybe we want to support simple concatenation here some time
274 
275 	if (auto unary = cast(UnaryExpression) expr)
276 		return evaluateExpressionString(unary);
277 	else
278 		return null;
279 }
280 
281 /// ditto
282 string evaluateExpressionString(const Token token)
283 {
284 	import dparse.strings : unescapeString, isStringLiteral;
285 
286 	switch (token.type)
287 	{
288 	case tok!"stringLiteral":
289 	case tok!"wstringLiteral":
290 	case tok!"dstringLiteral":
291 		auto str = token.text;
292 
293 		// we want to unquote here
294 		// foreach because implicit concatenation can combine multiple strings
295 		auto ret = appender!string;
296 		scope StringCache cache = StringCache(16);
297 		LexerConfig config;
298 		config.commentBehavior = CommentBehavior.noIntern;
299 		config.stringBehavior = StringBehavior.source;
300 		config.whitespaceBehavior = WhitespaceBehavior.skip;
301 		config.fileName = "evaluate-string-stdin";
302 		foreach (t; DLexer(str, config, &cache))
303 		{
304 			switch (t.type)
305 			{
306 			case tok!"stringLiteral":
307 			case tok!"wstringLiteral":
308 			case tok!"dstringLiteral":
309 				if (t.text.isStringLiteral)
310 				{
311 					ret ~= unescapeString(t.text);
312 				}
313 				else
314 				{
315 					debug
316 					{
317 						throw new Exception("Invalid stringLiteral in stringLiteral token: `" ~ t.text ~ '`');
318 					}
319 					else
320 					{
321 						warningf("Invalid stringLiteral in stringLiteral token: `%s`", t.text);
322 						return str;
323 					}
324 				}
325 				break;
326 			default:
327 				// unexpected token, return input because it might already be
328 				// unescaped
329 				return str;
330 			}
331 		}
332 
333 		return ret.data;
334 	default:
335 		return null;
336 	}
337 }
338 
339 /// Finds the deepest non-null node of any BaseNode. (like visiting the tree)
340 /// Aborts on types that contain `DeclarationOrStatement` or `Declaration[]`
341 /// fields.
342 /// Useful for getting the IfStatement out of a DeclarationOrStatement without
343 /// traversing its children.
344 BaseNode findDeepestNonBlockNode(T : BaseNode)(T ast)
345 {
346 	static assert(!is(T == BaseNode), "Passed in a BaseNode, that's probably not what you wanted to do (pass in the most specific type you have)");
347 	bool nonProcess = false;
348 	foreach (member; ast.tupleof)
349 	{
350 		static if (is(typeof(member) : DeclarationOrStatement)
351 			|| is(typeof(member) : Declaration[]))
352 		{
353 			nonProcess = true;
354 		}
355 	}
356 
357 	if (nonProcess)
358 		return ast;
359 
360 	foreach (member; ast.tupleof)
361 	{
362 		static if (is(typeof(member) : BaseNode))
363 		{
364 			if (member !is null)
365 			{
366 				return findDeepestNonBlockNode(member);
367 			}
368 		}
369 	}
370 	return ast;
371 }
372 
373 /// Gets the final `else` block of an if. Will return a node of type
374 /// `IfStatement` if it's an `else if` block. Returns null if there is no single
375 /// else statement.
376 BaseNode getIfElse(IfStatement ifStmt)
377 {
378 	if (!ifStmt.elseStatement)
379 		return null;
380 
381 	while (true)
382 	{
383 		auto elseStmt = ifStmt.elseStatement;
384 		if (!elseStmt)
385 			return ifStmt;
386 
387 		auto stmtInElse = elseStmt.findDeepestNonBlockNode;
388 		assert(stmtInElse !is elseStmt);
389 
390 		if (cast(IfStatement)stmtInElse)
391 			ifStmt = cast(IfStatement)stmtInElse;
392 		else
393 			return stmtInElse;
394 	}
395 }
396 
397 unittest
398 {
399 	StringCache stringCache = StringCache(StringCache.defaultBucketCount);
400 	RollbackAllocator rba;
401 	IfStatement parseIfStmt(string code)
402 	{
403 		const(Token)[] tokens = getTokensForParser(cast(ubyte[])code, LexerConfig.init, &stringCache);
404 		auto parser = new Parser();
405 		parser.tokens = tokens;
406 		parser.allocator = &rba;
407 		return parser.parseIfStatement();
408 	}
409 
410 	alias p = parseIfStmt;
411 	assert(getIfElse(p("if (x) {}")) is null);
412 	assert(getIfElse(p("if (x) {} else if (y) {}")) !is null);
413 	assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {}")) !is null, typeid(getIfElse(p("if (x) {} else if (y) {}"))).name);
414 	assert(getIfElse(p("if (x) {} else if (y) {} else {}")) !is null);
415 	assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {} else {}")) is null);
416 }
417 
418 C[] substr(C)(C[] s, size_t[2] range)
419 {
420 	return substr(s, range[0], range[1]);
421 }
422 
423 C[] substr(C)(C[] s, size_t start, size_t end)
424 {
425 	if (!s.length)
426 		return s;
427 	if (start < 0)
428 		start = 0;
429 	if (start >= s.length)
430 		start = s.length - 1; // @suppress(dscanner.suspicious.length_subtraction)
431 	if (end > s.length)
432 		end = s.length;
433 	if (end < start)
434 		return s[start .. start];
435 	return s[start .. end];
436 }