1 module served.lsp.textdocumentmanager;
2 
3 import std.algorithm;
4 import std.experimental.logger;
5 import std.json;
6 import std.string;
7 import std.utf : codeLength, decode, UseReplacementDchar;
8 
9 import served.lsp.jsonrpc;
10 import served.lsp.protocol;
11 
12 /// in-memory representation of a file at any given URI. Not thread-safe.
13 struct Document
14 {
15 	/// The URI of this document. Should not be changed.
16 	DocumentUri uri;
17 	/// The language ID as reported by the client. Should not be changed.
18 	string languageId;
19 	/// The document version as reported by the client. Should not be changed.
20 	long version_;
21 	private char[] text;
22 
23 	/// Returns the language ID or guesses it given the filename's extension.
24 	/// Returns null if none is set and can't be guessed.
25 	///
26 	/// Guessing Map:
27 	/// * `.d|.di` = `"d"`
28 	/// * `.dpp` = `"dpp"`
29 	/// * `.c` = `"c"`
30 	/// * `.cpp` = `"cpp"`
31 	/// * `.ds|.dscript` = `"dscript"`
32 	/// * `.dml` = `"dml"`
33 	/// * `.sdl` = `"sdl"`
34 	/// * `.dt` = `"diet"`
35 	/// * `.json` = `"json"`
36 	string getLanguageId() const @property @trusted @nogc nothrow
37 	{
38 		if (!languageId.length)
39 		{
40 			import std.path : extension;
41 			import std.uni : sicmp;
42 
43 			const ext = uri.extension;
44 			if (ext.sicmp(".d") == 0 || ext.sicmp(".di") == 0)
45 				return "d";
46 			else if (ext.sicmp(".dpp") == 0)
47 				return "dpp";
48 			else if (ext.sicmp(".c") == 0)
49 				return "c";
50 			else if (ext.sicmp(".cpp") == 0)
51 				return "cpp";
52 			else if (ext.sicmp(".ds") == 0 || ext.sicmp(".dscript") == 0)
53 				return "dscript";
54 			else if (ext.sicmp(".dml") == 0)
55 				return "dml";
56 			else if (ext.sicmp(".sdl") == 0)
57 				return "sdl";
58 			else if (ext.sicmp(".dt") == 0)
59 				return "diet";
60 			else if (ext.sicmp(".json") == 0)
61 				return "json";
62 			else
63 				return null;
64 		}
65 
66 		return languageId;
67 	}
68 
69 	///
70 	unittest
71 	{
72 		Document d;
73 		assert(d.getLanguageId == null);
74 		d.uri = "file:///home/project/app.d";
75 		assert(d.getLanguageId == "d");
76 		d.languageId = "cpp";
77 		assert(d.getLanguageId == "cpp");
78 	}
79 
80 	/// Creates a new document at the given document URI, with version 0 and
81 	/// no text and guessed language ID. See $(LREF getLanguageId)
82 	this(DocumentUri uri)
83 	{
84 		this.uri = uri;
85 		languageId = getLanguageId;
86 		version_ = 0;
87 		text = null;
88 	}
89 
90 	///
91 	unittest
92 	{
93 		auto doc = Document("file:///home/projects/app.d");
94 		assert(doc.uri == "file:///home/projects/app.d");
95 		assert(doc.languageId == "d");
96 		assert(doc.version_ == 0);
97 		assert(!doc.rawText.length);
98 	}
99 
100 	/// Creates a new document at the given document URI, with the given version
101 	/// and language and creates a copy of the text to use.
102 	this(TextDocumentItem doc)
103 	{
104 		uri = doc.uri;
105 		languageId = doc.languageId;
106 		version_ = doc.version_;
107 		text = doc.text.dup;
108 	}
109 
110 	///
111 	unittest
112 	{
113 		// e.g. received from LSP client
114 		TextDocumentItem item = {
115 			uri: "file:///home/projects/app.c",
116 			languageId: "cpp",
117 			version_: 0,
118 			text: "#include <stdio>",
119 		};
120 		auto doc = Document(item);
121 		assert(doc.length == "#include <stdio>".length);
122 	}
123 
124 	/// Creates a document with no URI and no language ID and copies the content
125 	/// into the text buffer using $(LREF setContent).
126 	static Document nullDocument(scope const(char)[] content)
127 	{
128 		Document ret;
129 		ret.setContent(content);
130 		return ret;
131 	}
132 
133 	///
134 	unittest
135 	{
136 		auto doc = Document.nullDocument(`import std.stdio;`);
137 		assert(!doc.languageId.length);
138 		assert(doc.version_ == 0);
139 		assert(!doc.uri.length);
140 		assert(doc.rawText == "import std.stdio;");
141 	}
142 
143 	/// Returns a copy of this document with the text memory duplicated.
144 	/// May safely be cast to immutable.
145 	Document clone() const
146 	{
147 		Document ret;
148 		ret.uri = uri;
149 		ret.version_ = version_;
150 		ret.languageId = languageId;
151 		ret.text = text.dup;
152 		return ret;
153 	}
154 
155 	version (unittest) private static Document nullDocumentOwnMemory(char[] content)
156 	{
157 		Document ret;
158 		ret.text = content;
159 		return ret;
160 	}
161 
162 	/// Returns a read-only view of the text. The text may however be changed
163 	/// by other operations, so this slice should be used directly and not after
164 	/// any context yield or API call potentially modifying the data.
165 	///
166 	/// If used on an immutable Document, the text cannot be changed and thus
167 	/// returns a full string instead of a const(char)[] slice.
168 	const(char)[] rawText() const
169 	{
170 		return text;
171 	}
172 
173 	/// ditto
174 	string rawText() immutable
175 	{
176 		return text;
177 	}
178 
179 	/// Returns the text length.
180 	size_t length() const @property
181 	{
182 		return text.length;
183 	}
184 
185 	/// Sets the content of this document to the given content. Copies the data
186 	/// from newContent into this text buffer.
187 	///
188 	/// Should not be called as an API unless managing some kind of virtual
189 	/// document manually.
190 	ref typeof(this) setContent(scope const(char)[] newContent) return
191 	{
192 		if (newContent.length < text.length)
193 		{
194 			text.ptr[0 .. newContent.length] = newContent;
195 			text.ptr[newContent.length] = '\0'; // insert null byte to find corruptions
196 			text.length = newContent.length;
197 			text = text.assumeSafeAppend;
198 		}
199 		else
200 		{
201 			text = text.assumeSafeAppend;
202 			text.length = newContent.length;
203 			text[0 .. $] = newContent;
204 		}
205 		return this;
206 	}
207 
208 	///
209 	void applyChange(TextRange range, scope const(char)[] newContent)
210 	{
211 		auto start = positionToBytes(range[0]);
212 		auto end = positionToBytes(range[1]);
213 
214 		if (start > end)
215 			swap(start, end);
216 
217 		if (start == 0 && end == text.length)
218 		{
219 			setContent(newContent);
220 			return;
221 		}
222 
223 		auto addition = newContent.representation;
224 		int removed = cast(int) end - cast(int) start;
225 		int added = cast(int) addition.length - removed;
226 		text = text.assumeSafeAppend;
227 		if (added > 0)
228 		{
229 			text.length += added;
230 			// text[end + added .. $] = text[end .. $ - added];
231 			for (int i = cast(int) text.length - 1; i >= end + added; i--)
232 				text[i] = text[i - added];
233 		}
234 		else if (added < 0)
235 		{
236 			for (size_t i = start; i < text.length + added; i++)
237 				text[i] = text[i - added];
238 
239 			text = text[0 .. $ + added];
240 		}
241 		text = text.assumeSafeAppend;
242 
243 		foreach (i, c; addition)
244 			text[start + i] = cast(char) c;
245 	}
246 
247 	/// Converts an LSP offset to a byte offset for using for example in array
248 	/// slicing.
249 	size_t offsetToBytes(size_t offset) const
250 	{
251 		return .countBytesUntilUTF16Index(text, offset);
252 	}
253 
254 	/// Converts a byte offset to an LSP offset.
255 	size_t bytesToOffset(size_t bytes) const
256 	{
257 		return .countUTF16Length(text[0 .. min($, bytes)]);
258 	}
259 
260 	/// Converts a line/column position to an LSP offset.
261 	size_t positionToOffset(Position position) const
262 	{
263 		size_t offset = 0;
264 		size_t bytes = 0;
265 		while (bytes < text.length && position.line > 0)
266 		{
267 			const c = text.ptr[bytes];
268 			if (c == '\n')
269 				position.line--;
270 			utf16DecodeUtf8Length(c, offset, bytes);
271 		}
272 
273 		while (bytes < text.length && position.character > 0)
274 		{
275 			const c = text.ptr[bytes];
276 			if (c == '\n')
277 				break;
278 			size_t utf16Size;
279 			utf16DecodeUtf8Length(c, utf16Size, bytes);
280 			if (utf16Size < position.character)
281 				position.character -= utf16Size;
282 			else
283 				position.character = 0;
284 			offset += utf16Size;
285 		}
286 		return offset;
287 	}
288 
289 	/// Converts a line/column position to a byte offset.
290 	size_t positionToBytes(Position position) const
291 	{
292 		size_t index = 0;
293 		while (index < text.length && position.line > 0)
294 			if (text.ptr[index++] == '\n')
295 				position.line--;
296 
297 		while (index < text.length && position.character > 0)
298 		{
299 			const c = text.ptr[index];
300 			if (c == '\n')
301 				break;
302 			size_t utf16Size;
303 			utf16DecodeUtf8Length(c, utf16Size, index);
304 			if (utf16Size < position.character)
305 				position.character -= utf16Size;
306 			else
307 				position.character = 0;
308 		}
309 		return index;
310 	}
311 
312 	/// Converts an LSP offset to a line/column position.
313 	Position offsetToPosition(size_t offset) const
314 	{
315 		size_t bytes;
316 		size_t index;
317 		size_t lastNl = -1;
318 
319 		Position ret;
320 		while (bytes < text.length && index < offset)
321 		{
322 			const c = text.ptr[bytes];
323 			if (c == '\n')
324 			{
325 				ret.line++;
326 				lastNl = index;
327 			}
328 			utf16DecodeUtf8Length(c, index, bytes);
329 		}
330 		const start = lastNl + 1;
331 		ret.character = cast(uint)(index - start);
332 		return ret;
333 	}
334 
335 	/// Converts a byte offset to a line/column position.
336 	Position bytesToPosition(size_t bytes) const
337 	{
338 		if (bytes > text.length)
339 			bytes = text.length;
340 		auto part = text.ptr[0 .. bytes].representation;
341 		size_t lastNl = -1;
342 		Position ret;
343 		foreach (i; 0 .. bytes)
344 		{
345 			if (part.ptr[i] == '\n')
346 			{
347 				ret.line++;
348 				lastNl = i;
349 			}
350 		}
351 		ret.character = cast(uint)(cast(const(char)[]) part[lastNl + 1 .. $]).countUTF16Length;
352 		return ret;
353 	}
354 
355 	/// Converts a line/column byte offset to a line/column position.
356 	Position lineColumnBytesToPosition(uint line, uint column) const
357 	{
358 		scope lineText = lineAtScope(line).chomp();
359 		uint offset = 0;
360 		// keep over-extending positions
361 		if (column > lineText.length)
362 		{
363 			offset = column - cast(uint)lineText.length;
364 			column = cast(uint)lineText.length;
365 		}
366 		// utf16 length is always gonna be less than byte length, so adding offset will never overflow
367 		return Position(line, cast(uint)lineText[0 .. column].countUTF16Length + offset);
368 	}
369 
370 	/// Returns the position at "end" starting from the given "src" position which is assumed to be at byte "start"
371 	/// Faster to quickly calculate nearby positions of known byte positions.
372 	/// Falls back to $(LREF bytesToPosition) if end is before start.
373 	Position movePositionBytes(Position src, size_t start, size_t end) const
374 	{
375 		if (end == start)
376 			return src;
377 		if (end < start)
378 			return bytesToPosition(end);
379 
380 		auto t = text.ptr[min(text.length, start) .. min(text.length, end)];
381 		size_t bytes;
382 		while (bytes < t.length)
383 		{
384 			const c = t.ptr[bytes];
385 			if (c == '\n')
386 			{
387 				src.line++;
388 				src.character = 0;
389 				bytes++;
390 			}
391 			else
392 				utf16DecodeUtf8Length(c, src.character, bytes);
393 		}
394 		return src;
395 	}
396 
397 	///
398 	unittest
399 	{
400 		import std.regex;
401 
402 		auto intRegex = regex(`\bint\b`);
403 
404 		Document d;
405 		d.setContent("int foo(int x, uint y)\n{\n    return cast(int)(x + y);\n}\n");
406 
407 		// either use size_t.max or 0, both work as starting points for different reasons:
408 		// - 0 always matches Position.init, so the offset can be calculated
409 		// - size_t.max is larger than the checked index match, so position is recomputed
410 		size_t lastIndex = size_t.max;
411 		Position lastPosition;
412 
413 		Position[] matches;
414 
415 		foreach (match; d.rawText.matchAll(intRegex))
416 		{
417 			size_t index = match.pre.length;
418 			// to reduce boilerplate, use d.nextPositionBytes instead!
419 			auto pos = d.movePositionBytes(lastPosition, lastIndex, index);
420 			lastIndex = index;
421 			lastPosition = pos;
422 			matches ~= pos;
423 		}
424 
425 		assert(matches == [
426 			Position(0, 0),
427 			Position(0, 8),
428 			Position(2, 16)
429 		]);
430 	}
431 
432 	/// Calls $(LREF movePositionBytes), updates src to be the return value and
433 	/// updates start to become end. This reduces boilerplate in common calling
434 	/// scenarios.
435 	Position nextPositionBytes(ref Position src, ref size_t start, size_t end) const
436 	{
437 		auto pos = movePositionBytes(src, start, end);
438 		src = pos;
439 		start = end;
440 		return pos;
441 	}
442 
443 	///
444 	unittest
445 	{
446 		import std.regex;
447 
448 		auto intRegex = regex(`\bint\b`);
449 
450 		Document d;
451 		d.setContent("int foo(int x, uint y)\n{\n    return cast(int)(x + y);\n}\n");
452 
453 		size_t lastIndex = size_t.max;
454 		Position lastPosition;
455 
456 		Position[] matches;
457 		foreach (match; d.rawText.matchAll(intRegex))
458 			matches ~= d.nextPositionBytes(lastPosition, lastIndex, match.pre.length);
459 
460 		assert(matches == [
461 			Position(0, 0),
462 			Position(0, 8),
463 			Position(2, 16)
464 		]);
465 	}
466 
467 	/// Returns the word range at a given line/column position.
468 	TextRange wordRangeAt(Position position) const
469 	{
470 		auto chars = wordInLine(lineAtScope(position), position.character);
471 		return TextRange(Position(position.line, chars[0]), Position(position.line, chars[1]));
472 	}
473 
474 	///
475 	unittest
476 	{
477 		Document d;
478 		d.setContent(`void main() { writeln("hello world"); }`);
479 		assert(d.wordRangeAt(Position(0, 0)) == TextRange(0, 0, 0, 4));
480 	}
481 
482 	/// Returns the word range at a given byte position.
483 	size_t[2] wordRangeAt(size_t bytes) const
484 	{
485 		auto lineStart = text.lastIndexOf('\n', bytes) + 1;
486 		auto ret = wordInLineBytes(
487 			text.ptr[lineStart .. text.length],
488 			cast(uint)(bytes - lineStart));
489 		ret[0] += lineStart;
490 		ret[1] += lineStart;
491 		return ret;
492 	}
493 
494 	///
495 	unittest
496 	{
497 		Document d;
498 		d.setContent(`void main() { writeln("hello world"); }`);
499 		assert(d.wordRangeAt(0) == [0, 4]);
500 		assert(d.wordRangeAt(3) == [0, 4]);
501 		assert(d.wordRangeAt(4) == [0, 4]);
502 		assert(d.wordRangeAt(5) == [5, 9]);
503 		assert(d.wordRangeAt(9) == [5, 9]);
504 		assert(d.wordRangeAt(10) == [10, 10]);
505 		assert(d.wordRangeAt(14) == [14, 21]);
506 		assert(d.wordRangeAt(20) == [14, 21]);
507 		assert(d.wordRangeAt(21) == [14, 21]);
508 		assert(d.wordRangeAt(23) == [23, 28]);
509 		assert(d.wordRangeAt(27) == [23, 28]);
510 		assert(d.wordRangeAt(28) == [23, 28]);
511 		assert(d.wordRangeAt(29) == [29, 34]);
512 		assert(d.wordRangeAt(30) == [29, 34]);
513 		assert(d.wordRangeAt(34) == [29, 34]);
514 	}
515 
516 	/// Returns a byte offset range as `[start, end]` of the given 0-based line
517 	/// number. Contains the line terminator, if it exists.
518 	size_t[2] lineByteRangeAt(uint line) const
519 	{
520 		size_t start = 0;
521 		size_t index = 0;
522 		while (line > 0 && index < text.length)
523 		{
524 			const c = text.ptr[index++];
525 			if (c == '\n')
526 			{
527 				line--;
528 				start = index;
529 			}
530 		}
531 		// if !found
532 		if (line != 0)
533 			return [0, 0];
534 
535 		auto end = text.indexOf('\n', start);
536 		if (end == -1)
537 			end = text.length;
538 		else
539 			end++;
540 
541 		return [start, end];
542 	}
543 
544 	/// Returns the text of a line at the given position.
545 	///
546 	/// Contains the line terminator, if it exists.
547 	///
548 	/// The overload taking in a position just calls the overload taking a line
549 	/// with the line being the position's line.
550 	string lineAt(Position position) const
551 	{
552 		return lineAt(position.line);
553 	}
554 
555 	/// ditto
556 	string lineAt(Position position) immutable
557 	{
558 		return lineAt(position.line);
559 	}
560 
561 	/// Returns the text of a line starting at line 0.
562 	///
563 	/// Contains the line terminator, if it exists.
564 	string lineAt(uint line) const
565 	{
566 		return lineAtScope(line).idup;
567 	}
568 
569 	/// ditto
570 	string lineAt(uint line) immutable
571 	{
572 		return lineAtScope(line);
573 	}
574 
575 	///
576 	unittest
577 	{
578 		Document d = Document("file:///home/projects/app.d");
579 		d.setContent("im");
580 
581 		immutable d2 = cast(immutable)d.clone.setContent("import std.stdio;\nvoid main() {}");
582 
583 		static assert(is(typeof(d.lineAtScope(0)) == const(char)[]));
584 		static assert(is(typeof(d2.lineAtScope(0)) == string));
585 		static assert(is(typeof(d.lineAt(0)) == string));
586 		static assert(is(typeof(d2.lineAt(0)) == string));
587 
588 		assert(d.lineAt(0) == "im");
589 		assert(d2.lineAt(0) == "import std.stdio;\n");
590 
591 		assert(d.lineAtScope(0) == "im");
592 		assert(d2.lineAtScope(0) == "import std.stdio;\n");
593 
594 		assert(d.lineAt(0).ptr !is d.rawText.ptr);
595 		assert(d2.lineAt(0).ptr is d2.rawText.ptr);
596 	}
597 
598 	/// Returns the line text at the given position. The memory content may be
599 	/// modified by the $(LREF setContent) method by other code in the same
600 	/// context or in a different context.
601 	///
602 	/// The overload taking in a position just calls the overload taking a line
603 	/// with the line being the position's line.
604 	///
605 	/// Contains the line terminator, if it exists.
606 	///
607 	/// See_Also: $(LREF lineAt) to get the same content, but with duplicated
608 	/// memory, so it can be stored for later use.
609 	scope auto lineAtScope(Position position) const inout
610 	{
611 		return lineAtScope(position.line);
612 	}
613 
614 	/// ditto
615 	scope auto lineAtScope(uint line) const inout
616 	{
617 		auto range = lineByteRangeAt(line);
618 		return text[range[0] .. range[1]];
619 	}
620 
621 	///
622 	unittest
623 	{
624 		void assertEqual(A, B)(A a, B b)
625 		{
626 			import std.conv : to;
627 
628 			assert(a == b, a.to!string ~ " is not equal to " ~ b.to!string);
629 		}
630 
631 		Document doc;
632 		doc.setContent(`abc
633 hellö world
634 how åre
635 you?`);
636 		assertEqual(doc.lineAt(Position(0, 0)), "abc\n");
637 		assertEqual(doc.lineAt(Position(0, 100)), "abc\n");
638 		assertEqual(doc.lineAt(Position(1, 3)), "hellö world\n");
639 		assertEqual(doc.lineAt(Position(2, 0)), "how åre\n");
640 		assertEqual(doc.lineAt(Position(3, 0)), "you?");
641 		assertEqual(doc.lineAt(Position(3, 8)), "you?");
642 		assertEqual(doc.lineAt(Position(4, 0)), "");
643 	}
644 
645 	/// Returns how a line is terminated at the given 0-based line number.
646 	/// Defaults to LF for the last line / no line terminator.
647 	EolType eolAt(int line) const
648 	{
649 		size_t index = 0;
650 		int curLine = 0;
651 		bool prevWasCr = false;
652 		while (index < text.length)
653 		{
654 			auto c = decode!(UseReplacementDchar.yes)(text, index);
655 			if (c == '\n')
656 			{
657 				if (curLine == line)
658 				{
659 					return prevWasCr ? EolType.crlf : EolType.lf;
660 				}
661 				curLine++;
662 			}
663 			prevWasCr = c == '\r';
664 		}
665 		return EolType.lf;
666 	}
667 
668 	///
669 	unittest
670 	{
671 		auto d = Document("file:///home/projects/app.d");
672 		d.setContent("import std.stdio;\nvoid main() {\r\n\twriteln(`hello world`);\r}");
673 		// \r is not supported as line terminator
674 		assert(d.lineAt(2) == "\twriteln(`hello world`);\r}");
675 
676 		assert(d.eolAt(0) == EolType.lf);
677 		assert(d.eolAt(1) == EolType.crlf);
678 		assert(d.eolAt(2) == EolType.lf);
679 	}
680 }
681 
682 /// Helper struct which should have one unique instance in the application which
683 /// processes document events sent by a LSP client to an LSP server and creates
684 /// an in-memory representation of all the files managed by the client.
685 ///
686 /// This data structure is not thread safe.
687 struct TextDocumentManager
688 {
689 	/// Internal document storage. Only iterate over this using `foreach`, other
690 	/// operations are not considered officially supported.
691 	Document[] documentStore;
692 
693 	/// Same as $(LREF tryGet) but throws an exception if the URI doesn't exist.
694 	ref Document opIndex(string uri)
695 	{
696 		auto idx = documentStore.countUntil!(a => a.uri == uri);
697 		if (idx == -1)
698 			throw new Exception("Document '" ~ uri ~ "' not found");
699 		return documentStore[idx];
700 	}
701 
702 	/// Tries to get a document from a URI, returns Document.init if it is not
703 	/// in the in-memory cache / not sent by the client.
704 	Document tryGet(string uri)
705 	{
706 		auto idx = documentStore.countUntil!(a => a.uri == uri);
707 		if (idx == -1)
708 			return Document.init;
709 		return documentStore[idx];
710 	}
711 
712 	deprecated ref Document loadFromFilesystem()(string uri)
713 	{
714 		static assert(false, "use getOrFromFilesystem instead (LSP open takes priority over filesystem)");
715 	}
716 
717 	/// Returns the managed document for the given URI or if it doesn't exist
718 	/// it tries to read the file from the filesystem and open it from that.
719 	///
720 	/// Note that a LSP close method will unload this early.
721 	///
722 	/// Params:
723 	///     uri = the document URI to try to load. Must be consistent with LSP
724 	///           URIs. (e.g. normalized URIs)
725 	///     inserted = if specified, gets set to true if the file was read from
726 	///                filesystem and false if it was already present.
727 	///
728 	/// Returns: the created document
729 	///
730 	/// Throws: FileException in case the file doesn't exist or other file
731 	///         system errors. In this case no new document should have been
732 	///         inserted yet.
733 	ref Document getOrFromFilesystem(string uri, out bool inserted)
734 	{
735 		import served.lsp.uri : uriToFile;
736 		import fs = std.file;
737 
738 		auto idx = documentStore.countUntil!(a => a.uri == uri);
739 		if (idx != -1)
740 		{
741 			inserted = false;
742 			return documentStore[idx];
743 		}
744 
745 		string path = uriToFile(uri);
746 		auto content = fs.readText(path);
747 
748 		auto index = documentStore.length++;
749 		documentStore[index].uri = uri;
750 		documentStore[index].version_ = -1;
751 		documentStore[index].setContent(content);
752 		inserted = true;
753 		return documentStore[index];
754 	}
755 
756 	///
757 	unittest
758 	{
759 		import served.lsp.uri;
760 
761 		import std.file;
762 		import std.path;
763 
764 		auto dir = buildPath(tempDir(), "textdocumentmanager");
765 		mkdir(dir);
766 		scope (exit)
767 			rmdirRecurse(dir);
768 
769 		auto app_d = buildPath(dir, "app.d");
770 		auto src = "import std.stdio; void main() { writeln(`hello world`); }";
771 		write(app_d, src);
772 
773 		TextDocumentManager documents;
774 		bool created;
775 		auto doc = &documents.getOrFromFilesystem(uriFromFile(app_d), created);
776 		assert(created);
777 		auto other = &documents.getOrFromFilesystem(uriFromFile(app_d));
778 		assert(doc is other);
779 
780 		assert(doc.rawText == src);
781 		assert(doc.rawText !is src);
782 	}
783 
784 	/// ditto
785 	ref Document getOrFromFilesystem(string uri)
786 	{
787 		bool b;
788 		return getOrFromFilesystem(uri, b);
789 	}
790 
791 	/// Unloads the given URI so it's no longer accessible. Note that this
792 	/// should only be done for documents loaded manually and never for LSP
793 	/// documents as it will break all features in that file until reopened.
794 	bool unloadDocument(string uri)
795 	{
796 		auto idx = documentStore.countUntil!(a => a.uri == uri);
797 		if (idx == -1)
798 			return false;
799 
800 		documentStore[idx] = documentStore[$ - 1];
801 		documentStore.length--;
802 		documentStore = documentStore.assumeSafeAppend;
803 		return true;
804 	}
805 
806 	/// Returns the currently preferred syncKind to use with the client.
807 	/// Additionally always supports the `full` sync kind.
808 	static TextDocumentSyncKind syncKind()
809 	{
810 		return TextDocumentSyncKind.incremental;
811 	}
812 
813 	///
814 	unittest
815 	{
816 		assert(TextDocumentManager.syncKind == TextDocumentSyncKind.incremental);
817 	}
818 
819 	/// Inserts a document manually or updates an existing one, acting like
820 	/// textDocument/didOpen if it didn't exist or fully replacing the document
821 	/// if it did exist.
822 	ref Document insertOrUpdate(Document d)
823 	{
824 		auto idx = documentStore.countUntil!(a => a.uri == d.uri);
825 		if (idx != -1)
826 		{
827 			return documentStore[idx] = d;
828 		}
829 		else
830 		{
831 			auto index = documentStore.length++;
832 			return documentStore[index] = d;
833 		}
834 	}
835 
836 	/// Processes an LSP packet and performs the document update in-memory that
837 	/// is requested.
838 	/// Params:
839 	///   msg = The request sent by a client. This method only processes
840 	///     `textDocument/` messages which are relevant to file modification.
841 	/// Returns: `true` if the given method was handled, `false` otherwise.
842 	bool process(RequestMessageRaw msg)
843 	{
844 		if (msg.method == "textDocument/didOpen")
845 		{
846 			auto params = msg.paramsJson.deserializeJson!DidOpenTextDocumentParams;
847 			// there may be at most one didOpen request, but library code can
848 			// load files from the filesystem 
849 			insertOrUpdate(Document(params.textDocument));
850 			return true;
851 		}
852 		else if (msg.method == "textDocument/didClose")
853 		{
854 			auto params = msg.paramsJson.deserializeJson!DidCloseTextDocumentParams;
855 			auto targetUri = params.textDocument.uri;
856 			if (!unloadDocument(targetUri))
857 			{
858 				warning("Received didClose notification for URI not in system: ", targetUri);
859 				warning("This can be a potential memory leak if it was previously opened under a different name.");
860 			}
861 			return true;
862 		}
863 		else if (msg.method == "textDocument/didChange")
864 		{
865 			auto params = msg.paramsJson.deserializeJson!DidChangeTextDocumentParams;
866 			auto targetUri = params.textDocument.uri;
867 			auto idx = documentStore.countUntil!(a => a.uri == targetUri);
868 			if (idx >= 0)
869 			{
870 				documentStore[idx].version_ = params.textDocument.version_;
871 				foreach (change; params.contentChanges)
872 				{
873 					if (!change.range.isNone)
874 						documentStore[idx].applyChange(change.range.deref, change.text);
875 					else
876 						documentStore[idx].setContent(change.text);
877 				}
878 			}
879 			return true;
880 		}
881 		return false;
882 	}
883 }
884 
885 ///
886 unittest
887 {
888 	import std.exception;
889 
890 	TextDocumentManager documents;
891 	// most common usage, forward LSP events to this helper struct.
892 	RequestMessageRaw incomingPacket = {
893 		// dummy data
894 		method: "textDocument/didOpen",
895 		paramsJson: `{
896 			"textDocument": {
897 				"uri": "file:///home/projects/app.d",
898 				"languageId": "d",
899 				"version": 123,
900 				"text": "import std.stdio;\n\nvoid main()\n{\n\twriteln(\"hello world\");\n}\n"
901 			}
902 		}`
903 	};
904 	documents.process(incomingPacket);
905 	// documents.process returns false if it's not a method meant for text
906 	// document management. serve-d:serverbase abstracts this away automatically.
907 
908 	// normally used from LSP methods where you have params like this
909 	TextDocumentPositionParams params = {
910 		textDocument: TextDocumentIdentifier("file:///home/projects/app.d"),
911 		position: Position(4, 2)
912 	};
913 
914 	// if it's sent by the LSP, the document being loaded should be almost guaranteed.
915 	auto doc = documents[params.textDocument.uri];
916 	// trying to index files that haven't been sent by the client will throw an Exception
917 	assertThrown(documents["file:///path/to/non-registered.d"]);
918 
919 	// you can use tryGet to see if a Document has been opened yet and use it if so.
920 	assert(documents.tryGet("file:///path/to/non-registered.d") is Document.init);
921 	assert(documents.tryGet(params.textDocument.uri) !is Document.init);
922 
923 	// Document defines a variety of utility functions that have been optimized
924 	// for speed and convenience.
925 	assert(doc.lineAtScope(params.position) == "\twriteln(\"hello world\");\n");
926 
927 	auto range = doc.wordRangeAt(params.position);
928 	assert(doc.positionToBytes(range.start) == 34);
929 	assert(doc.positionToBytes(range.end) == 41);
930 
931 	// when yielding (Fiber context switch) documents may be modified or deleted though:
932 
933 	RequestMessageRaw incomingPacket2 = {
934 		// dummy data
935 		method: "textDocument/didChange",
936 		paramsJson: `{
937 			"textDocument": {
938 				"uri": "file:///home/projects/app.d",
939 				"version": 124
940 			},
941 			"contentChanges": [
942 				{
943 					"range": {
944 						"start": { "line": 4, "character": 6 },
945 						"end": { "line": 4, "character": 8 }
946 					},
947 					"text": ""
948 				}
949 			]
950 		}`
951 	};
952 	documents.process(incomingPacket2);
953 
954 	assert(doc.lineAtScope(params.position) == "\twrite(\"hello world\");\n");
955 
956 	RequestMessageRaw incomingPacket3 = {
957 		// dummy data
958 		method: "textDocument/didChange",
959 		paramsJson: `{
960 			"textDocument": {
961 				"uri": "file:///home/projects/app.d",
962 				"version": 125
963 			},
964 			"contentChanges": [
965 				{
966 					"text": "replace everything"
967 				}
968 			]
969 		}`
970 	};
971 	documents.process(incomingPacket3);
972 
973 	// doc.rawText is now half overwritten, you need to refetch a document when yielding or updating:
974 	assert(doc.rawText != "replace everything");
975 	doc = documents[params.textDocument.uri];
976 	assert(doc.rawText == "replace everything");
977 
978 	RequestMessageRaw incomingPacket4 = {
979 		// dummy data
980 		method: "textDocument/didClose",
981 		paramsJson: `{
982 			"textDocument": {
983 				"uri": "file:///home/projects/app.d"
984 			}
985 		}`
986 	};
987 	documents.process(incomingPacket4);
988 
989 	assertThrown(documents[params.textDocument.uri]);
990 	// so make sure that you don't keep references to documents when leaving scope or switching context.
991 }
992 
993 /// Helper structure for storing any data of type T on a per-file basis.
994 struct PerDocumentCache(T)
995 {
996 	struct Entry
997 	{
998 		Document document;
999 		T data;
1000 	}
1001 
1002 	Entry[] entries;
1003 
1004 	T cached(ref TextDocumentManager source, string uri)
1005 	{
1006 		auto newest = source.tryGet(uri);
1007 		foreach (entry; entries)
1008 			if (entry.document.uri == uri)
1009 			{
1010 				if (entry.document.version_ >= newest.version_)
1011 					return entry.data;
1012 				else
1013 					return T.init;
1014 			}
1015 		return T.init;
1016 	}
1017 
1018 	void store(Document document, T data)
1019 	{
1020 		foreach (ref entry; entries)
1021 		{
1022 			if (entry.document.uri == document.uri)
1023 			{
1024 				if (document.version_ >= entry.document.version_)
1025 				{
1026 					entry.document = document;
1027 					entry.data = data;
1028 				}
1029 				return;
1030 			}
1031 		}
1032 		entries ~= Entry(document, data);
1033 	}
1034 }
1035 
1036 /// Returns a range of the identifier/word at the given position.
1037 uint[2] wordInLine(const(char)[] line, uint character)
1038 {
1039 	return wordInLineImpl!(wchar, uint)(line, character);
1040 }
1041 
1042 /// ditto
1043 size_t[2] wordInLineBytes(const(char)[] line, size_t bytes)
1044 {
1045 	return wordInLineImpl!(char, size_t)(line, bytes);
1046 }
1047 
1048 SizeT[2] wordInLineImpl(CharT, SizeT)(const(char)[] line, SizeT character)
1049 out(r; r[1] >= r[0])
1050 {
1051 	size_t index = 0;
1052 	SizeT offs = 0;
1053 
1054 	SizeT lastStart = 0;
1055 	SizeT start = character, end = character;
1056 	bool searchStart = true;
1057 
1058 	while (index < line.length)
1059 	{
1060 		const c = decode(line, index);
1061 		const l = cast(SizeT) c.codeLength!CharT;
1062 
1063 		if (searchStart)
1064 		{
1065 			if (isDIdentifierSeparatingChar(c))
1066 			{
1067 				if (character == 0)
1068 					break;
1069 				lastStart = offs + l;
1070 			}
1071 
1072 			if (offs + l >= character)
1073 			{
1074 				start = lastStart;
1075 				searchStart = false;
1076 			}
1077 
1078 			offs += l;
1079 		}
1080 		else
1081 		{
1082 			end = offs;
1083 			offs += l;
1084 			if (isDIdentifierSeparatingChar(c))
1085 				break;
1086 		}
1087 	}
1088 
1089 	if (start > line.length)
1090 		start = cast(SizeT)line.length;
1091 	if (end > line.length)
1092 		end = cast(SizeT)line.length;
1093 
1094 	return [start, end];
1095 }
1096 
1097 unittest
1098 {
1099 	string a = "int i;";
1100 	string b = "a (int i;";
1101 	string c = "{int i;";
1102 	string d = "{ int i;";
1103 	assert(a.wordInLineBytes(0) == [0, 3]);
1104 	assert(a.wordInLineBytes(1) == [0, 3]);
1105 	assert(a.wordInLineBytes(2) == [0, 3]);
1106 	assert(a.wordInLineBytes(3) == [0, 3]);
1107 	assert(a.wordInLineBytes(4) == [4, 5]);
1108 	assert(a.wordInLineBytes(5) == [4, 5]);
1109 	assert(a.wordInLineBytes(6) == [6, 6]);
1110 	assert(a.wordInLineBytes(7) == [6, 6]);
1111 	assert(a.wordInLineBytes(size_t.max) == [6, 6]);
1112 
1113 	assert(b.wordInLineBytes(0) == [0, 1]);
1114 	assert(b.wordInLineBytes(1) == [0, 1]);
1115 	assert(b.wordInLineBytes(2) == [2, 2]);
1116 	assert(b.wordInLineBytes(3) == [3, 6]);
1117 	assert(b.wordInLineBytes(4) == [3, 6]);
1118 	assert(b.wordInLineBytes(5) == [3, 6]);
1119 	assert(b.wordInLineBytes(6) == [3, 6]);
1120 	assert(b.wordInLineBytes(7) == [7, 8]);
1121 	assert(b.wordInLineBytes(8) == [7, 8]);
1122 	assert(b.wordInLineBytes(9) == [9, 9]);
1123 	assert(b.wordInLineBytes(10) == [9, 9]);
1124 	assert(b.wordInLineBytes(100) == [9, 9]);
1125 	assert(b.wordInLineBytes(size_t.max) == [9, 9]);
1126 
1127 	assert(c.wordInLineBytes(0) == [0, 0]);
1128 	assert(c.wordInLineBytes(1) == [1, 4]);
1129 	assert(c.wordInLineBytes(2) == [1, 4]);
1130 	assert(c.wordInLineBytes(3) == [1, 4]);
1131 	assert(c.wordInLineBytes(4) == [1, 4]);
1132 	assert(c.wordInLineBytes(5) == [5, 6]);
1133 	assert(c.wordInLineBytes(6) == [5, 6]);
1134 	assert(c.wordInLineBytes(7) == [7, 7]);
1135 	assert(c.wordInLineBytes(8) == [7, 7]);
1136 	assert(c.wordInLineBytes(size_t.max) == [7, 7]);
1137 
1138 	assert(d.wordInLineBytes(0) == [0, 0]);
1139 	assert(d.wordInLineBytes(1) == [1, 1]);
1140 	assert(d.wordInLineBytes(2) == [2, 5]);
1141 	assert(d.wordInLineBytes(3) == [2, 5]);
1142 	assert(d.wordInLineBytes(4) == [2, 5]);
1143 	assert(d.wordInLineBytes(5) == [2, 5]);
1144 	assert(d.wordInLineBytes(6) == [6, 7]);
1145 	assert(d.wordInLineBytes(7) == [6, 7]);
1146 	assert(d.wordInLineBytes(8) == [8, 8]);
1147 	assert(d.wordInLineBytes(9) == [8, 8]);
1148 	assert(d.wordInLineBytes(size_t.max) == [8, 8]);
1149 }
1150 
1151 deprecated("use isDIdentifierSeparatingChar instead")
1152 alias isIdentifierSeparatingChar = isDIdentifierSeparatingChar;
1153 
1154 ///
1155 bool isDIdentifierSeparatingChar(dchar c)
1156 {
1157 	return c < 48 || (c > 57 && c < 65) || c == '[' || c == '\\' || c == ']'
1158 		|| c == '`' || (c > 122 && c < 128) || c == '\u2028' || c == '\u2029'; // line separators
1159 }
1160 
1161 ///
1162 bool isValidDIdentifier(const(char)[] s)
1163 {
1164 	import std.ascii : isDigit;
1165 
1166 	return s.length && !s[0].isDigit && !s.any!isDIdentifierSeparatingChar;
1167 }
1168 
1169 unittest
1170 {
1171 	assert(!isValidDIdentifier(""));
1172 	assert(!isValidDIdentifier("0"));
1173 	assert(!isValidDIdentifier("10"));
1174 	assert(!isValidDIdentifier("1a"));
1175 	assert(isValidDIdentifier("_"));
1176 	assert(isValidDIdentifier("a"));
1177 	assert(isValidDIdentifier("__helloWorld123"));
1178 }
1179 
1180 unittest
1181 {
1182 	Document doc;
1183 	doc.text.reserve(16);
1184 	auto ptr = doc.text.ptr;
1185 	assert(doc.rawText.length == 0);
1186 	doc.setContent("Hello world");
1187 	assert(doc.rawText == "Hello world");
1188 	doc.setContent("foo");
1189 	assert(doc.rawText == "foo");
1190 	doc.setContent("foo bar baz baf");
1191 	assert(doc.rawText == "foo bar baz baf");
1192 	doc.applyChange(TextRange(0, 4, 0, 8), "");
1193 	assert(doc.rawText == "foo baz baf");
1194 	doc.applyChange(TextRange(0, 4, 0, 8), "bad");
1195 	assert(doc.rawText == "foo badbaf");
1196 	doc.applyChange(TextRange(0, 4, 0, 8), "bath");
1197 	assert(doc.rawText == "foo bathaf");
1198 	doc.applyChange(TextRange(0, 4, 0, 10), "bath");
1199 	assert(doc.rawText == "foo bath");
1200 	doc.applyChange(TextRange(0, 0, 0, 8), "bath");
1201 	assert(doc.rawText == "bath");
1202 	doc.applyChange(TextRange(0, 0, 0, 1), "par");
1203 	assert(doc.rawText == "parath", doc.rawText);
1204 	doc.applyChange(TextRange(0, 0, 0, 4), "");
1205 	assert(doc.rawText == "th");
1206 	doc.applyChange(TextRange(0, 2, 0, 2), "e");
1207 	assert(doc.rawText == "the");
1208 	doc.applyChange(TextRange(0, 0, 0, 0), "in");
1209 	assert(doc.rawText == "inthe");
1210 	assert(ptr is doc.text.ptr);
1211 }
1212 
1213 pragma(inline, true) private void utf16DecodeUtf8Length(A, B)(char c, ref A utf16Index,
1214 		ref B utf8Index) @safe nothrow @nogc
1215 {
1216 	switch (c & 0b1111_0000)
1217 	{
1218 	case 0b1110_0000:
1219 		// assume valid encoding (no wrong surrogates)
1220 		utf16Index++;
1221 		utf8Index += 3;
1222 		break;
1223 	case 0b1111_0000:
1224 		utf16Index += 2;
1225 		utf8Index += 4;
1226 		break;
1227 	case 0b1100_0000:
1228 	case 0b1101_0000:
1229 		utf16Index++;
1230 		utf8Index += 2;
1231 		break;
1232 	default:
1233 		utf16Index++;
1234 		utf8Index++;
1235 		break;
1236 	}
1237 }
1238 
1239 pragma(inline, true) size_t countUTF16Length(scope const(char)[] text) @safe nothrow @nogc
1240 {
1241 	size_t offset;
1242 	size_t index;
1243 	while (index < text.length)
1244 	{
1245 		const c = (() @trusted => text.ptr[index++])();
1246 		if (cast(byte)c >= -0x40) offset++;
1247 		if (c >= 0xf0) offset++;
1248 	}
1249 	return offset;
1250 }
1251 
1252 pragma(inline, true) size_t countBytesUntilUTF16Index(scope const(char)[] text, size_t utf16Offset) @safe nothrow @nogc
1253 {
1254 	size_t bytes;
1255 	size_t offset;
1256 	while (offset < utf16Offset && bytes < text.length)
1257 	{
1258 		char c = (() @trusted => text.ptr[bytes++])();
1259 		if (cast(byte)c >= -0x40) offset++;
1260 		if (c >= 0xf0) offset++;
1261 	}
1262 	while (bytes < text.length)
1263 	{
1264 		char c = (() @trusted => text.ptr[bytes])();
1265 		if (cast(byte)c >= -0x40) break;
1266 		bytes++;
1267 	}
1268 	return bytes;
1269 }
1270 
1271 version (unittest)
1272 {
1273 	import core.time;
1274 
1275 	Document testUnicodeDocument = Document.nullDocumentOwnMemory(cast(char[]) `///
1276 /// Copyright © 2020 Somebody (not actually™) x3
1277 ///
1278 module some.file;
1279 
1280 enum Food : int
1281 {
1282 	pizza = '\U0001F355', // 🍕
1283 	burger = '\U0001F354', // 🍔
1284 	chicken = '\U0001F357', // 🍗
1285 	taco = '\U0001F32E', // 🌮
1286 	wrap = '\U0001F32F', // 🌯
1287 	salad = '\U0001F957', // 🥗
1288 	pasta = '\U0001F35D', // 🍝
1289 	sushi = '\U0001F363', // 🍣
1290 	oden = '\U0001F362', // 🍢
1291 	egg = '\U0001F373', // 🍳
1292 	croissant = '\U0001F950', // 🥐
1293 	baguette = '\U0001F956', // 🥖
1294 	popcorn = '\U0001F37F', // 🍿
1295 	coffee = '\u2615', // ☕
1296 	cookie = '\U0001F36A', // 🍪
1297 }
1298 
1299 void main() {
1300 	// taken from https://github.com/DlangRen/Programming-in-D/blob/master/ddili/src/ders/d.cn/aa.d
1301 	int[string] colorCodes = [ /* ... */ ];
1302 
1303 	if ("purple" in colorCodes) {
1304 		// ü®™🍳键 “purple” 在表中
1305 
1306 	} else { // line 31
1307 		//表中不存在 键 “purple” 
1308 	}
1309 
1310 	string x;
1311 }`);
1312 
1313 	enum testSOF_byte = 0;
1314 	enum testSOF_offset = 0;
1315 	enum testSOF_position = Position(0, 0);
1316 
1317 	enum testEOF_byte = 872;
1318 	enum testEOF_offset = 805;
1319 	enum testEOF_position = Position(36, 1);
1320 
1321 	// in line before unicode
1322 	enum testLinePreUni_byte = 757;
1323 	enum testLinePreUni_offset = 724;
1324 	enum testLinePreUni_position = Position(29, 4); // after `//`
1325 
1326 	// in line after unicode
1327 	enum testLinePostUni_byte = 789;
1328 	enum testLinePostUni_offset = 742;
1329 	enum testLinePostUni_position = Position(29, 22); // after `purple” 在`
1330 
1331 	// ascii line after unicode line
1332 	enum testMidAsciiLine_byte = 804;
1333 	enum testMidAsciiLine_offset = 753;
1334 	enum testMidAsciiLine_position = Position(31, 7);
1335 
1336 	// after unicode, end of line
1337 	enum testEOLPostUni_byte = 795;
1338 	enum testEOLPostUni_offset = 744;
1339 	enum testEOLPostUni_position = Position(29, 24); // after `purple” 在表中`
1340 
1341 	@("{offset, bytes, position} -> {offset, bytes, position}")
1342 	unittest
1343 	{
1344 		import std.conv;
1345 		import std.stdio;
1346 
1347 		static foreach (test; [
1348 				"SOF", "EOF", "LinePreUni", "LinePostUni", "MidAsciiLine", "EOLPostUni"
1349 			])
1350 		{
1351 			{
1352 				enum testOffset = mixin("test" ~ test ~ "_offset");
1353 				enum testByte = mixin("test" ~ test ~ "_byte");
1354 				enum testPosition = mixin("test" ~ test ~ "_position");
1355 
1356 				writeln(" === Test ", test, " ===");
1357 
1358 				writeln(testByte, " byte -> offset ", testOffset);
1359 				assert(testUnicodeDocument.bytesToOffset(testByte) == testOffset,
1360 						"fail " ~ test ~ " byte->offset = " ~ testUnicodeDocument.bytesToOffset(testByte)
1361 						.to!string);
1362 				writeln(testByte, " byte -> position ", testPosition);
1363 				assert(testUnicodeDocument.bytesToPosition(testByte) == testPosition,
1364 						"fail " ~ test ~ " byte->position = " ~ testUnicodeDocument.bytesToPosition(testByte)
1365 						.to!string);
1366 
1367 				writeln(testOffset, " offset -> byte ", testByte);
1368 				assert(testUnicodeDocument.offsetToBytes(testOffset) == testByte,
1369 						"fail " ~ test ~ " offset->byte = " ~ testUnicodeDocument.offsetToBytes(testOffset)
1370 						.to!string);
1371 				writeln(testOffset, " offset -> position ", testPosition);
1372 				assert(testUnicodeDocument.offsetToPosition(testOffset) == testPosition,
1373 						"fail " ~ test ~ " offset->position = " ~ testUnicodeDocument.offsetToPosition(testOffset)
1374 						.to!string);
1375 
1376 				writeln(testPosition, " position -> offset ", testOffset);
1377 				assert(testUnicodeDocument.positionToOffset(testPosition) == testOffset,
1378 						"fail " ~ test ~ " position->offset = " ~ testUnicodeDocument.positionToOffset(testPosition)
1379 						.to!string);
1380 				writeln(testPosition, " position -> byte ", testByte);
1381 				assert(testUnicodeDocument.positionToBytes(testPosition) == testByte,
1382 						"fail " ~ test ~ " position->byte = " ~ testUnicodeDocument.positionToBytes(testPosition)
1383 						.to!string);
1384 
1385 				writeln();
1386 			}
1387 		}
1388 
1389 		const size_t maxBytes = testEOF_byte;
1390 		const size_t maxOffset = testEOF_offset;
1391 		const Position maxPosition = testEOF_position;
1392 
1393 		writeln("max offset -> byte");
1394 		assert(testUnicodeDocument.offsetToBytes(size_t.max) == maxBytes);
1395 		writeln("max offset -> position");
1396 		assert(testUnicodeDocument.offsetToPosition(size_t.max) == maxPosition);
1397 		writeln("max byte -> offset");
1398 		assert(testUnicodeDocument.bytesToOffset(size_t.max) == maxOffset);
1399 		writeln("max byte -> position");
1400 		assert(testUnicodeDocument.bytesToPosition(size_t.max) == maxPosition);
1401 		writeln("max position -> offset");
1402 		assert(testUnicodeDocument.positionToOffset(Position(uint.max, uint.max)) == maxOffset);
1403 		writeln("max position -> byte");
1404 		assert(testUnicodeDocument.positionToBytes(Position(uint.max, uint.max)) == maxBytes);
1405 	}
1406 
1407 	unittest
1408 	{
1409 		// in line after unicode
1410 		foreach (col; cast(uint[])[256, 300, int.max, uint.max])
1411 		{
1412 			assert(testUnicodeDocument.positionToBytes(Position(29, col)) == testEOLPostUni_byte);
1413 			assert(testUnicodeDocument.positionToOffset(Position(29, col)) == testEOLPostUni_offset);
1414 		}
1415 
1416 		assert(testUnicodeDocument.lineColumnBytesToPosition(29, 42) == Position(29, 24));
1417 		assert(testUnicodeDocument.lineColumnBytesToPosition(29, 43) == Position(29, 25));
1418 		assert(testUnicodeDocument.lineColumnBytesToPosition(29, 4_000_000_042) == Position(29, 4_000_000_024));
1419 		assert(testUnicodeDocument.lineColumnBytesToPosition(29, uint.max) == Position(29, 4_294_967_277));
1420 	}
1421 
1422 	version (none)
1423 	@("character transform benchmarks")
1424 	unittest
1425 	{
1426 		import std.datetime.stopwatch;
1427 		import std.random;
1428 		import std.stdio;
1429 
1430 		enum PositionCount = 32;
1431 		size_t[PositionCount] testBytes;
1432 		size_t[PositionCount] testOffsets;
1433 		Position[PositionCount] testPositions;
1434 
1435 		static immutable funs = [
1436 			"offsetToBytes", "offsetToPosition", "bytesToOffset", "bytesToPosition",
1437 			"positionToOffset", "positionToBytes"
1438 		];
1439 
1440 		size_t debugSum;
1441 
1442 		size_t lengthUtf16 = testUnicodeDocument.text.codeLength!wchar;
1443 		enum TestRepeats = 10;
1444 		Duration[TestRepeats][funs.length] times;
1445 
1446 		StopWatch sw;
1447 		static foreach (iterations; [
1448 				1e3, 1e4, /* 1e5 */
1449 			])
1450 		{
1451 			writeln("==================");
1452 			writeln("Timing ", iterations, "x", PositionCount, "x", TestRepeats, " iterations:");
1453 			foreach (ref row; times)
1454 				foreach (ref col; row)
1455 					col = Duration.zero;
1456 
1457 			static foreach (t; 0 .. TestRepeats)
1458 			{
1459 				foreach (i, ref v; testOffsets)
1460 				{
1461 					v = uniform(0, lengthUtf16);
1462 					testBytes[i] = testUnicodeDocument.offsetToBytes(v);
1463 					testPositions[i] = testUnicodeDocument.offsetToPosition(v);
1464 				}
1465 				static foreach (fi, fun; funs)
1466 				{
1467 					sw.reset();
1468 					sw.start();
1469 					foreach (i; 0 .. iterations)
1470 					{
1471 						foreach (v; 0 .. PositionCount)
1472 						{
1473 							static if (fun[0] == 'b')
1474 								mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testBytes[v]).sumVal;");
1475 							else static if (fun[0] == 'o')
1476 								mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testOffsets[v]).sumVal;");
1477 							else static if (fun[0] == 'p')
1478 								mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testPositions[v]).sumVal;");
1479 							else
1480 								static assert(false);
1481 						}
1482 					}
1483 					sw.stop();
1484 					times[fi][t] = sw.peek;
1485 				}
1486 			}
1487 			static foreach (fi, fun; funs)
1488 			{
1489 				writeln(fun, ": ", formatDurationDistribution(times[fi]));
1490 			}
1491 			writeln();
1492 			writeln();
1493 		}
1494 
1495 		writeln("tricking the optimizer", debugSum);
1496 	}
1497 
1498 	private pragma(inline, true) size_t sumVal(size_t v) pure @safe nothrow @nogc
1499 	{
1500 		return v;
1501 	}
1502 
1503 	private pragma(inline, true) size_t sumVal(Position v) pure @trusted nothrow @nogc
1504 	{
1505 		return cast(size_t)*(cast(ulong*)&v);
1506 	}
1507 
1508 	private string formatDurationDistribution(size_t n)(Duration[n] durs)
1509 	{
1510 		import std.algorithm : fold, map, sort, sum;
1511 		import std.format : format;
1512 		import std.math : sqrt;
1513 
1514 		Duration total = durs[].fold!"a+b";
1515 		sort!"a<b"(durs[]);
1516 		double msAvg = cast(double) total.total!"hnsecs" / 10_000.0 / n;
1517 		double msMedian = cast(double) durs[$ / 2].total!"hnsecs" / 10_000.0;
1518 		double[n] diffs = 0;
1519 		foreach (i, dur; durs)
1520 			diffs[i] = (cast(double) dur.total!"hnsecs" / 10_000.0) - msAvg;
1521 		double msStdDeviation = diffs[].map!"a*a".sum.sqrt;
1522 		return format!"[avg=%.4fms, median=%.4f, sd=%.4f]"(msAvg, msMedian, msStdDeviation);
1523 	}
1524 }