served.lsp.textdocumentmanager source code

1 module served.lsp.textdocumentmanager;
2 
3 import std.algorithm;
4 import std.experimental.logger;
5 import std.json;
6 import std.string;
7 import std.utf : codeLength, decode, UseReplacementDchar;
8 
9 import served.lsp.jsonrpc;
10 import served.lsp.protocol;
11 
12 import painlessjson;
13 
14 /// in-memory representation of a file at any given URI. Not thread-safe.
15 struct Document
16 {
17 	/// The URI of this document. Should not be changed.
18 	DocumentUri uri;
19 	/// The language ID as reported by the client. Should not be changed.
20 	string languageId;
21 	/// The document version as reported by the client. Should not be changed.
22 	long version_;
23 	private char[] text;
24 
25 	string getLanguageId() const @property @trusted @nogc nothrow
26 	{
27 		if (!languageId.length)
28 		{
29 			import std.path : extension;
30 			import std.uni : sicmp;
31 
32 			const ext = uri.extension;
33 			if (ext.sicmp(".d") == 0)
34 				return "d";
35 			else if (ext.sicmp(".dpp") == 0)
36 				return "dpp";
37 			else if (ext.sicmp(".ds") == 0 || ext.sicmp(".dscript") == 0)
38 				return "dscript";
39 			else if (ext.sicmp(".dml") == 0)
40 				return "dml";
41 			else if (ext.sicmp(".sdl") == 0)
42 				return "sdl";
43 			else if (ext.sicmp(".dt") == 0)
44 				return "diet";
45 			else
46 				return null;
47 		}
48 
49 		return languageId;
50 	}
51 
52 	/// Creates a new D document at the given document URI, with version 0 and
53 	/// no text.
54 	this(DocumentUri uri)
55 	{
56 		this.uri = uri;
57 		languageId = "d";
58 		version_ = 0;
59 		text = null;
60 	}
61 
62 	/// Creates a new document at the given document URI, with the given version
63 	/// and language and creates a copy of the text to use.
64 	this(TextDocumentItem doc)
65 	{
66 		uri = doc.uri;
67 		languageId = doc.languageId;
68 		version_ = doc.version_;
69 		text = doc.text.dup;
70 	}
71 
72 	/// Creates a document with no URI and no language ID and copies the content
73 	/// into the text buffer using $(LREF setContent).
74 	static Document nullDocument(scope const(char)[] content)
75 	{
76 		Document ret;
77 		ret.setContent(content);
78 		return ret;
79 	}
80 
81 	immutable(Document) clone()
82 	{
83 		Document ret = this;
84 		ret.text = text.dup;
85 		return cast(immutable) ret;
86 	}
87 
88 	version (unittest) private static Document nullDocumentOwnMemory(char[] content)
89 	{
90 		Document ret;
91 		ret.text = content;
92 		return ret;
93 	}
94 
95 	/// Returns a read-only view of the text. The text may however be changed
96 	/// by other operations, so this slice should be used directly and not after
97 	/// any context yield or API call potentially modifying the data.
98 	const(char)[] rawText() const
99 	{
100 		return cast(const(char)[]) text;
101 	}
102 
103 	string rawText() immutable
104 	{
105 		return text;
106 	}
107 
108 	///
109 	size_t length() const @property
110 	{
111 		return text.length;
112 	}
113 
114 	/// Sets the content of this document to the given content. Copies the data
115 	/// from newContent into this text buffer.
116 	///
117 	/// Should not be called as an API unless managing some kind of virtual
118 	/// document manually.
119 	void setContent(scope const(char)[] newContent)
120 	{
121 		if (newContent.length <= text.length)
122 		{
123 			text[0 .. newContent.length] = newContent;
124 			text.length = newContent.length;
125 		}
126 		else
127 		{
128 			text = text.assumeSafeAppend;
129 			text.length = newContent.length;
130 			text = text.assumeSafeAppend;
131 			text[0 .. $] = newContent;
132 		}
133 	}
134 
135 	///
136 	void applyChange(TextRange range, scope const(char)[] newContent)
137 	{
138 		auto start = positionToBytes(range[0]);
139 		auto end = positionToBytes(range[1]);
140 
141 		if (start > end)
142 			swap(start, end);
143 
144 		if (start == 0 && end == text.length)
145 		{
146 			setContent(newContent);
147 			return;
148 		}
149 
150 		auto addition = newContent.representation;
151 		int removed = cast(int) end - cast(int) start;
152 		int added = cast(int) addition.length - removed;
153 		text = text.assumeSafeAppend;
154 		if (added > 0)
155 		{
156 			text.length += added;
157 			// text[end + added .. $] = text[end .. $ - added];
158 			for (int i = cast(int) text.length - 1; i >= end + added; i--)
159 				text[i] = text[i - added];
160 		}
161 		else if (added < 0)
162 		{
163 			for (size_t i = start; i < text.length + added; i++)
164 				text[i] = text[i - added];
165 
166 			text = text[0 .. $ + added];
167 		}
168 		text = text.assumeSafeAppend;
169 
170 		foreach (i, c; addition)
171 			text[start + i] = cast(char) c;
172 	}
173 
174 	/// Converts an LSP offset to a byte offset for using for example in array
175 	/// slicing.
176 	size_t offsetToBytes(size_t offset) const
177 	{
178 		return .countBytesUntilUTF16Index(text, offset);
179 	}
180 
181 	/// Converts a byte offset to an LSP offset.
182 	size_t bytesToOffset(size_t bytes) const
183 	{
184 		return .countUTF16Length(text[0 .. min($, bytes)]);
185 	}
186 
187 	/// Converts a line/column position to an LSP offset.
188 	size_t positionToOffset(Position position) const
189 	{
190 		size_t offset = 0;
191 		size_t bytes = 0;
192 		while (bytes < text.length && position.line > 0)
193 		{
194 			const c = text.ptr[bytes];
195 			if (c == '\n')
196 				position.line--;
197 			utf16DecodeUtf8Length(c, offset, bytes);
198 		}
199 
200 		while (bytes < text.length && position.character > 0)
201 		{
202 			const c = text.ptr[bytes];
203 			if (c == '\n')
204 				break;
205 			size_t utf16Size;
206 			utf16DecodeUtf8Length(c, utf16Size, bytes);
207 			if (utf16Size < position.character)
208 				position.character -= utf16Size;
209 			else
210 				position.character = 0;
211 			offset += utf16Size;
212 		}
213 		return offset;
214 	}
215 
216 	/// Converts a line/column position to a byte offset.
217 	size_t positionToBytes(Position position) const
218 	{
219 		size_t index = 0;
220 		while (index < text.length && position.line > 0)
221 			if (text.ptr[index++] == '\n')
222 				position.line--;
223 
224 		while (index < text.length && position.character > 0)
225 		{
226 			const c = text.ptr[index];
227 			if (c == '\n')
228 				break;
229 			size_t utf16Size;
230 			utf16DecodeUtf8Length(c, utf16Size, index);
231 			if (utf16Size < position.character)
232 				position.character -= utf16Size;
233 			else
234 				position.character = 0;
235 		}
236 		return index;
237 	}
238 
239 	/// Converts an LSP offset to a line/column position.
240 	Position offsetToPosition(size_t offset) const
241 	{
242 		size_t bytes;
243 		size_t index;
244 		size_t lastNl = -1;
245 
246 		Position ret;
247 		while (bytes < text.length && index < offset)
248 		{
249 			const c = text.ptr[bytes];
250 			if (c == '\n')
251 			{
252 				ret.line++;
253 				lastNl = index;
254 			}
255 			utf16DecodeUtf8Length(c, index, bytes);
256 		}
257 		const start = lastNl + 1;
258 		ret.character = cast(uint)(index - start);
259 		return ret;
260 	}
261 
262 	/// Converts a byte offset to a line/column position.
263 	Position bytesToPosition(size_t bytes) const
264 	{
265 		if (bytes > text.length)
266 			bytes = text.length;
267 		auto part = text.ptr[0 .. bytes].representation;
268 		size_t lastNl = -1;
269 		Position ret;
270 		foreach (i; 0 .. bytes)
271 		{
272 			if (part.ptr[i] == '\n')
273 			{
274 				ret.line++;
275 				lastNl = i;
276 			}
277 		}
278 		ret.character = cast(uint)(cast(const(char)[]) part[lastNl + 1 .. $]).countUTF16Length;
279 		return ret;
280 	}
281 
282 	/// Converts a line/column byte offset to a line/column position.
283 	Position lineColumnBytesToPosition(uint line, uint column) const
284 	{
285 		scope lineText = lineAtScope(line);
286 		uint offset = 0;
287 		// keep over-extending positions
288 		if (column > lineText.length)
289 		{
290 			offset = column - cast(uint)lineText.length;
291 			column -= offset;
292 			assert(column <= lineText.length);
293 		}
294 		return Position(line, cast(uint) lineText[0 .. column].countUTF16Length + offset);
295 	}
296 
297 	/// Returns the position at "end" starting from the given "src" position which is assumed to be at byte "start"
298 	/// Faster to quickly calculate nearby positions of known byte positions.
299 	/// Falls back to $(LREF bytesToPosition) if end is before start.
300 	Position movePositionBytes(Position src, size_t start, size_t end) const
301 	{
302 		if (end == start)
303 			return src;
304 		if (end < start)
305 			return bytesToPosition(end);
306 
307 		auto t = text[min($, start) .. min($, end)];
308 		size_t bytes;
309 		while (bytes < t.length)
310 		{
311 			const c = t.ptr[bytes];
312 			if (c == '\n')
313 			{
314 				src.line++;
315 				src.character = 0;
316 				bytes++;
317 			}
318 			else
319 				utf16DecodeUtf8Length(c, src.character, bytes);
320 		}
321 		return src;
322 	}
323 
324 	Position nextPositionBytes(ref Position src, ref size_t start, size_t end) const
325 	{
326 		auto pos = movePositionBytes(src, start, end);
327 		src = pos;
328 		start = end;
329 		return pos;
330 	}
331 
332 	/// Returns the word range at a given line/column position.
333 	TextRange wordRangeAt(Position position) const
334 	{
335 		auto chars = wordInLine(lineAtScope(position), position.character);
336 		return TextRange(Position(position.line, chars[0]), Position(position.line, chars[1]));
337 	}
338 
339 	/// Returns the word range at a given byte position.
340 	size_t[2] wordRangeAt(size_t bytes) const
341 	{
342 		auto lineStart = text.lastIndexOf('\n', bytes) + 1;
343 		auto ret = wordInLineBytes(text[lineStart .. $], cast(uint)(bytes - lineStart));
344 		ret[0] += lineStart;
345 		ret[1] += lineStart;
346 		return ret;
347 	}
348 
349 	/// Returns a byte offset range as `[start, end]` of the given 0-based line
350 	/// number.
351 	size_t[2] lineByteRangeAt(uint line) const
352 	{
353 		size_t start = 0;
354 		size_t index = 0;
355 		while (line > 0 && index < text.length)
356 		{
357 			const c = text.ptr[index++];
358 			if (c == '\n')
359 			{
360 				line--;
361 				start = index;
362 			}
363 		}
364 		// if !found
365 		if (line != 0)
366 			return [0, 0];
367 
368 		auto end = text.indexOf('\n', start);
369 		if (end == -1)
370 			end = text.length;
371 		else
372 			end++;
373 
374 		return [start, end];
375 	}
376 
377 	/// Returns the text of a line at the given position.
378 	string lineAt(Position position) const
379 	{
380 		return lineAt(position.line);
381 	}
382 
383 	/// Returns the text of a line starting at line 0.
384 	string lineAt(uint line) const
385 	{
386 		return lineAtScope(line).idup;
387 	}
388 
389 	/// Returns the line text which is only in this scope if text isn't modified
390 	/// See_Also: $(LREF lineAt)
391 	scope const(char)[] lineAtScope(Position position) const
392 	{
393 		return lineAtScope(position.line);
394 	}
395 
396 	/// Returns the line text which is only in this scope if text isn't modified
397 	/// See_Also: $(LREF lineAt)
398 	scope const(char)[] lineAtScope(uint line) const
399 	{
400 		auto range = lineByteRangeAt(line);
401 		return text[range[0] .. range[1]];
402 	}
403 
404 	unittest
405 	{
406 		void assertEqual(A, B)(A a, B b)
407 		{
408 			import std.conv : to;
409 
410 			assert(a == b, a.to!string ~ " is not equal to " ~ b.to!string);
411 		}
412 
413 		Document doc;
414 		doc.setContent(`abc
415 hellö world
416 how åre
417 you?`);
418 		assertEqual(doc.lineAt(Position(0, 0)), "abc\n");
419 		assertEqual(doc.lineAt(Position(0, 100)), "abc\n");
420 		assertEqual(doc.lineAt(Position(1, 3)), "hellö world\n");
421 		assertEqual(doc.lineAt(Position(2, 0)), "how åre\n");
422 		assertEqual(doc.lineAt(Position(3, 0)), "you?");
423 		assertEqual(doc.lineAt(Position(3, 8)), "you?");
424 		assertEqual(doc.lineAt(Position(4, 0)), "");
425 	}
426 
427 	/// Returns how a line is terminated at the given 0-based line number.
428 	EolType eolAt(int line) const
429 	{
430 		size_t index = 0;
431 		int curLine = 0;
432 		bool prevWasCr = false;
433 		while (index < text.length)
434 		{
435 			if (curLine > line)
436 				return EolType.lf;
437 			auto c = decode!(UseReplacementDchar.yes)(text, index);
438 			if (c == '\n')
439 			{
440 				if (curLine == line)
441 				{
442 					return prevWasCr ? EolType.crlf : EolType.lf;
443 				}
444 				curLine++;
445 			}
446 			prevWasCr = c == '\r';
447 		}
448 		return EolType.lf;
449 	}
450 }
451 
452 /// Helper struct which should have one unique instance in the application which
453 /// processes document events sent by a LSP client to an LSP server and creates
454 /// an in-memory representation of all the files managed by the client.
455 struct TextDocumentManager
456 {
457 	/// Internal document storage. Only iterate over this using `foreach`, other
458 	/// operations are not considered officially supported.
459 	Document[] documentStore;
460 
461 	/// Same as $(LREF tryGet) but throws an exception if the URI doesn't exist.
462 	ref Document opIndex(string uri)
463 	{
464 		auto idx = documentStore.countUntil!(a => a.uri == uri);
465 		if (idx == -1)
466 			throw new Exception("Document '" ~ uri ~ "' not found");
467 		return documentStore[idx];
468 	}
469 
470 	/// Tries to get a document from a URI, returns Document.init if it is not
471 	/// in the in-memory cache / not sent by the client.
472 	Document tryGet(string uri)
473 	{
474 		auto idx = documentStore.countUntil!(a => a.uri == uri);
475 		if (idx == -1)
476 			return Document.init;
477 		return documentStore[idx];
478 	}
479 
480 	/// Tries to load a given URI manually without having it received via LSP
481 	/// methods. Note that a LSP close method will unload this early.
482 	/// Returns: the created document
483 	/// Throws: FileException in case the file doesn't exist or other file
484 	///         system errors. In this case no new document should have been
485 	///         inserted yet.
486 	ref Document loadFromFilesystem(string uri)
487 	{
488 		import served.lsp.uri : uriToFile;
489 		import fs = std.file;
490 
491 		string path = uriToFile(uri);
492 		auto content = fs.readText(path);
493 
494 		auto index = documentStore.length++;
495 		documentStore[index].uri = uri;
496 		documentStore[index].version_ = -1;
497 		documentStore[index].setContent(content);
498 		return documentStore[index];
499 	}
500 
501 	/// Tries to get a document from a URI, returns Document.init if it is not
502 	/// in the in-memory cache / not sent by the client.
503 	/// Throws: FileException in case the file doesn't exist or other file
504 	///         system errors. In this case no new document should have been
505 	///         inserted yet.
506 	ref Document getOrFromFilesystem(string uri)
507 	{
508 		auto idx = documentStore.countUntil!(a => a.uri == uri);
509 		if (idx == -1)
510 			return loadFromFilesystem(uri);
511 		else
512 			return documentStore[idx];
513 	}
514 
515 	/// Unloads the given URI so it's no longer accessible. Note that this
516 	/// should only be done for documents loaded manually and never for LSP
517 	/// documents as it will break all features in that file until reopened.
518 	bool unloadDocument(string uri)
519 	{
520 		auto idx = documentStore.countUntil!(a => a.uri == uri);
521 		if (idx == -1)
522 			return false;
523 
524 		documentStore[idx] = documentStore[$ - 1];
525 		documentStore.length--;
526 		return true;
527 	}
528 
529 	/// Returns the currently preferred syncKind to use with the client.
530 	/// Additionally always supports the `full` sync kind.
531 	static TextDocumentSyncKind syncKind()
532 	{
533 		return TextDocumentSyncKind.incremental;
534 	}
535 
536 	/// Processes an LSP packet and performs the document update in-memory that
537 	/// is requested.
538 	/// Params:
539 	///   msg = The request sent by a client. This method only processes
540 	///     `textDocument/` messages which are relevant to file modification.
541 	/// Returns: `true` if the given method was handled, `false` otherwise.
542 	bool process(RequestMessage msg)
543 	{
544 		if (msg.method == "textDocument/didOpen")
545 		{
546 			auto params = msg.params.fromJSON!DidOpenTextDocumentParams;
547 			documentStore ~= Document(params.textDocument);
548 			return true;
549 		}
550 		else if (msg.method == "textDocument/didClose")
551 		{
552 			auto targetUri = msg.params["textDocument"]["uri"].str;
553 			if (!unloadDocument(targetUri))
554 			{
555 				warning("Received didClose notification for URI not in system: ", targetUri);
556 				warning("This can be a potential memory leak if it was previously opened under a different name.");
557 			}
558 			return true;
559 		}
560 		else if (msg.method == "textDocument/didChange")
561 		{
562 			auto targetUri = msg.params["textDocument"]["uri"].str;
563 			auto idx = documentStore.countUntil!(a => a.uri == targetUri);
564 			if (idx >= 0)
565 			{
566 				documentStore[idx].version_ = msg.params["textDocument"]["version"].integer;
567 				foreach (change; msg.params["contentChanges"].array)
568 				{
569 					if (auto rangePtr = "range" in change)
570 					{
571 						auto range = *rangePtr;
572 						TextRange textRange = cast(Position[2])[
573 							range["start"].fromJSON!Position, range["end"].fromJSON!Position
574 						];
575 						documentStore[idx].applyChange(textRange, change["text"].str);
576 					}
577 					else
578 						documentStore[idx].setContent(change["text"].str);
579 				}
580 			}
581 			return true;
582 		}
583 		return false;
584 	}
585 }
586 
587 /// Helper structure for storing any data of type T on a per-file basis.
588 struct PerDocumentCache(T)
589 {
590 	struct Entry
591 	{
592 		Document document;
593 		T data;
594 	}
595 
596 	Entry[] entries;
597 
598 	T cached(ref TextDocumentManager source, string uri)
599 	{
600 		auto newest = source.tryGet(uri);
601 		foreach (entry; entries)
602 			if (entry.document.uri == uri)
603 			{
604 				if (entry.document.version_ >= newest.version_)
605 					return entry.data;
606 				else
607 					return T.init;
608 			}
609 		return T.init;
610 	}
611 
612 	void store(Document document, T data)
613 	{
614 		foreach (ref entry; entries)
615 		{
616 			if (entry.document.uri == document.uri)
617 			{
618 				if (document.version_ >= entry.document.version_)
619 				{
620 					entry.document = document;
621 					entry.data = data;
622 				}
623 				return;
624 			}
625 		}
626 		entries ~= Entry(document, data);
627 	}
628 }
629 
630 /// Returns a range of the identifier/word at the given position.
631 uint[2] wordInLine(const(char)[] line, uint character)
632 {
633 	return wordInLineImpl!(wchar, uint)(line, character);
634 }
635 
636 /// ditto
637 size_t[2] wordInLineBytes(const(char)[] line, size_t bytes)
638 {
639 	return wordInLineImpl!(char, size_t)(line, bytes);
640 }
641 
642 SizeT[2] wordInLineImpl(CharT, SizeT)(const(char)[] line, SizeT character)
643 {
644 	size_t index = 0;
645 	SizeT offs = 0;
646 
647 	SizeT lastStart = character;
648 	SizeT start = character, end = character + 1;
649 	bool searchStart = true;
650 
651 	while (index < line.length)
652 	{
653 		const c = decode(line, index);
654 		const l = cast(SizeT) c.codeLength!CharT;
655 
656 		if (searchStart)
657 		{
658 			if (isDIdentifierSeparatingChar(c))
659 				lastStart = offs + l;
660 
661 			if (offs + l >= character)
662 			{
663 				start = lastStart;
664 				searchStart = false;
665 			}
666 
667 			offs += l;
668 		}
669 		else
670 		{
671 			end = offs;
672 			offs += l;
673 			if (isDIdentifierSeparatingChar(c))
674 				break;
675 		}
676 	}
677 
678 	if (start > line.length)
679 		start = cast(SizeT)line.length;
680 	if (end > line.length)
681 		end = cast(SizeT)line.length;
682 	if (end < start)
683 		end = start;
684 
685 	return [start, end];
686 }
687 
688 deprecated("use isDIdentifierSeparatingChar instead")
689 alias isIdentifierSeparatingChar = isDIdentifierSeparatingChar;
690 
691 ///
692 bool isDIdentifierSeparatingChar(dchar c)
693 {
694 	return c < 48 || (c > 57 && c < 65) || c == '[' || c == '\\' || c == ']'
695 		|| c == '`' || (c > 122 && c < 128) || c == '\u2028' || c == '\u2029'; // line separators
696 }
697 
698 ///
699 bool isValidDIdentifier(const(char)[] s)
700 {
701 	import std.ascii : isDigit;
702 
703 	return s.length && !s[0].isDigit && !s.any!isDIdentifierSeparatingChar;
704 }
705 
706 unittest
707 {
708 	assert(!isValidDIdentifier(""));
709 	assert(!isValidDIdentifier("0"));
710 	assert(!isValidDIdentifier("10"));
711 	assert(!isValidDIdentifier("1a"));
712 	assert(isValidDIdentifier("_"));
713 	assert(isValidDIdentifier("a"));
714 	assert(isValidDIdentifier("__helloWorld123"));
715 }
716 
717 unittest
718 {
719 	Document doc;
720 	doc.text.reserve(16);
721 	auto ptr = doc.text.ptr;
722 	assert(doc.rawText.length == 0);
723 	doc.setContent("Hello world");
724 	assert(doc.rawText == "Hello world");
725 	doc.setContent("foo");
726 	assert(doc.rawText == "foo");
727 	doc.setContent("foo bar baz baf");
728 	assert(doc.rawText == "foo bar baz baf");
729 	doc.applyChange(TextRange(0, 4, 0, 8), "");
730 	assert(doc.rawText == "foo baz baf");
731 	doc.applyChange(TextRange(0, 4, 0, 8), "bad");
732 	assert(doc.rawText == "foo badbaf");
733 	doc.applyChange(TextRange(0, 4, 0, 8), "bath");
734 	assert(doc.rawText == "foo bathaf");
735 	doc.applyChange(TextRange(0, 4, 0, 10), "bath");
736 	assert(doc.rawText == "foo bath");
737 	doc.applyChange(TextRange(0, 0, 0, 8), "bath");
738 	assert(doc.rawText == "bath");
739 	doc.applyChange(TextRange(0, 0, 0, 1), "par");
740 	assert(doc.rawText == "parath", doc.rawText);
741 	doc.applyChange(TextRange(0, 0, 0, 4), "");
742 	assert(doc.rawText == "th");
743 	doc.applyChange(TextRange(0, 2, 0, 2), "e");
744 	assert(doc.rawText == "the");
745 	doc.applyChange(TextRange(0, 0, 0, 0), "in");
746 	assert(doc.rawText == "inthe");
747 	assert(ptr is doc.text.ptr);
748 }
749 
750 pragma(inline, true) private void utf16DecodeUtf8Length(A, B)(char c, ref A utf16Index,
751 		ref B utf8Index) @safe nothrow @nogc
752 {
753 	switch (c & 0b1111_0000)
754 	{
755 	case 0b1110_0000:
756 		// assume valid encoding (no wrong surrogates)
757 		utf16Index++;
758 		utf8Index += 3;
759 		break;
760 	case 0b1111_0000:
761 		utf16Index += 2;
762 		utf8Index += 4;
763 		break;
764 	case 0b1100_0000:
765 	case 0b1101_0000:
766 		utf16Index++;
767 		utf8Index += 2;
768 		break;
769 	default:
770 		utf16Index++;
771 		utf8Index++;
772 		break;
773 	}
774 }
775 
776 pragma(inline, true) size_t countUTF16Length(scope const(char)[] text) @safe nothrow @nogc
777 {
778 	size_t offset;
779 	size_t index;
780 	while (index < text.length)
781 	{
782 		const c = (() @trusted => text.ptr[index++])();
783 		if (cast(byte)c >= -0x40) offset++;
784 		if (c >= 0xf0) offset++;
785 	}
786 	return offset;
787 }
788 
789 pragma(inline, true) size_t countBytesUntilUTF16Index(scope const(char)[] text, size_t utf16Offset) @safe nothrow @nogc
790 {
791 	size_t bytes;
792 	size_t offset;
793 	while (offset < utf16Offset && bytes < text.length)
794 	{
795 		char c = (() @trusted => text.ptr[bytes++])();
796 		if (cast(byte)c >= -0x40) offset++;
797 		if (c >= 0xf0) offset++;
798 	}
799 	while (bytes < text.length)
800 	{
801 		char c = (() @trusted => text.ptr[bytes])();
802 		if (cast(byte)c >= -0x40) break;
803 		bytes++;
804 	}
805 	return bytes;
806 }
807 
808 version (unittest)
809 {
810 	import core.time;
811 
812 	Document testUnicodeDocument = Document.nullDocumentOwnMemory(cast(char[]) `///
813 /// Copyright © 2020 Somebody (not actually™) x3
814 ///
815 module some.file;
816 
817 enum Food : int
818 {
819 	pizza = '\U0001F355', // 🍕
820 	burger = '\U0001F354', // 🍔
821 	chicken = '\U0001F357', // 🍗
822 	taco = '\U0001F32E', // 🌮
823 	wrap = '\U0001F32F', // 🌯
824 	salad = '\U0001F957', // 🥗
825 	pasta = '\U0001F35D', // 🍝
826 	sushi = '\U0001F363', // 🍣
827 	oden = '\U0001F362', // 🍢
828 	egg = '\U0001F373', // 🍳
829 	croissant = '\U0001F950', // 🥐
830 	baguette = '\U0001F956', // 🥖
831 	popcorn = '\U0001F37F', // 🍿
832 	coffee = '\u2615', // ☕
833 	cookie = '\U0001F36A', // 🍪
834 }
835 
836 void main() {
837 	// taken from https://github.com/DlangRen/Programming-in-D/blob/master/ddili/src/ders/d.cn/aa.d
838 	int[string] colorCodes = [ /* ... */ ];
839 
840 	if ("purple" in colorCodes) {
841 		// ü®™🍳键 “purple” 在表中
842 
843 	} else { // line 31
844 		//表中不存在 键 “purple” 
845 	}
846 
847 	string x;
848 }`);
849 
850 	enum testSOF_byte = 0;
851 	enum testSOF_offset = 0;
852 	enum testSOF_position = Position(0, 0);
853 
854 	enum testEOF_byte = 872;
855 	enum testEOF_offset = 805;
856 	enum testEOF_position = Position(36, 1);
857 
858 	// in line before unicode
859 	enum testLinePreUni_byte = 757;
860 	enum testLinePreUni_offset = 724;
861 	enum testLinePreUni_position = Position(29, 4); // after `//`
862 
863 	// in line after unicode
864 	enum testLinePostUni_byte = 789;
865 	enum testLinePostUni_offset = 742;
866 	enum testLinePostUni_position = Position(29, 22); // after `purple” 在`
867 
868 	// ascii line after unicode line
869 	enum testMidAsciiLine_byte = 804;
870 	enum testMidAsciiLine_offset = 753;
871 	enum testMidAsciiLine_position = Position(31, 7);
872 
873 	@("{offset, bytes, position} -> {offset, bytes, position}")
874 	unittest
875 	{
876 		import std.conv;
877 		import std.stdio;
878 
879 		static foreach (test; [
880 				"SOF", "EOF", "LinePreUni", "LinePostUni", "MidAsciiLine"
881 			])
882 		{
883 			{
884 				enum testOffset = mixin("test" ~ test ~ "_offset");
885 				enum testByte = mixin("test" ~ test ~ "_byte");
886 				enum testPosition = mixin("test" ~ test ~ "_position");
887 
888 				writeln(" === Test ", test, " ===");
889 
890 				writeln(testByte, " byte -> offset ", testOffset);
891 				assert(testUnicodeDocument.bytesToOffset(testByte) == testOffset,
892 						"fail " ~ test ~ " byte->offset = " ~ testUnicodeDocument.bytesToOffset(testByte)
893 						.to!string);
894 				writeln(testByte, " byte -> position ", testPosition);
895 				assert(testUnicodeDocument.bytesToPosition(testByte) == testPosition,
896 						"fail " ~ test ~ " byte->position = " ~ testUnicodeDocument.bytesToPosition(testByte)
897 						.to!string);
898 
899 				writeln(testOffset, " offset -> byte ", testByte);
900 				assert(testUnicodeDocument.offsetToBytes(testOffset) == testByte,
901 						"fail " ~ test ~ " offset->byte = " ~ testUnicodeDocument.offsetToBytes(testOffset)
902 						.to!string);
903 				writeln(testOffset, " offset -> position ", testPosition);
904 				assert(testUnicodeDocument.offsetToPosition(testOffset) == testPosition,
905 						"fail " ~ test ~ " offset->position = " ~ testUnicodeDocument.offsetToPosition(testOffset)
906 						.to!string);
907 
908 				writeln(testPosition, " position -> offset ", testOffset);
909 				assert(testUnicodeDocument.positionToOffset(testPosition) == testOffset,
910 						"fail " ~ test ~ " position->offset = " ~ testUnicodeDocument.positionToOffset(testPosition)
911 						.to!string);
912 				writeln(testPosition, " position -> byte ", testByte);
913 				assert(testUnicodeDocument.positionToBytes(testPosition) == testByte,
914 						"fail " ~ test ~ " position->byte = " ~ testUnicodeDocument.positionToBytes(testPosition)
915 						.to!string);
916 
917 				writeln();
918 			}
919 		}
920 
921 		const size_t maxBytes = testEOF_byte;
922 		const size_t maxOffset = testEOF_offset;
923 		const Position maxPosition = testEOF_position;
924 
925 		writeln("max offset -> byte");
926 		assert(testUnicodeDocument.offsetToBytes(size_t.max) == maxBytes);
927 		writeln("max offset -> position");
928 		assert(testUnicodeDocument.offsetToPosition(size_t.max) == maxPosition);
929 		writeln("max byte -> offset");
930 		assert(testUnicodeDocument.bytesToOffset(size_t.max) == maxOffset);
931 		writeln("max byte -> position");
932 		assert(testUnicodeDocument.bytesToPosition(size_t.max) == maxPosition);
933 		writeln("max position -> offset");
934 		assert(testUnicodeDocument.positionToOffset(Position(uint.max, uint.max)) == maxOffset);
935 		writeln("max position -> byte");
936 		assert(testUnicodeDocument.positionToBytes(Position(uint.max, uint.max)) == maxBytes);
937 	}
938 
939 	version (none)
940 	@("character transform benchmarks")
941 	unittest
942 	{
943 		import std.datetime.stopwatch;
944 		import std.random;
945 		import std.stdio;
946 
947 		enum PositionCount = 32;
948 		size_t[PositionCount] testBytes;
949 		size_t[PositionCount] testOffsets;
950 		Position[PositionCount] testPositions;
951 
952 		static immutable funs = [
953 			"offsetToBytes", "offsetToPosition", "bytesToOffset", "bytesToPosition",
954 			"positionToOffset", "positionToBytes"
955 		];
956 
957 		size_t debugSum;
958 
959 		size_t lengthUtf16 = testUnicodeDocument.text.codeLength!wchar;
960 		enum TestRepeats = 10;
961 		Duration[TestRepeats][funs.length] times;
962 
963 		StopWatch sw;
964 		static foreach (iterations; [
965 				1e3, 1e4, /* 1e5 */
966 			])
967 		{
968 			writeln("==================");
969 			writeln("Timing ", iterations, "x", PositionCount, "x", TestRepeats, " iterations:");
970 			foreach (ref row; times)
971 				foreach (ref col; row)
972 					col = Duration.zero;
973 
974 			static foreach (t; 0 .. TestRepeats)
975 			{
976 				foreach (i, ref v; testOffsets)
977 				{
978 					v = uniform(0, lengthUtf16);
979 					testBytes[i] = testUnicodeDocument.offsetToBytes(v);
980 					testPositions[i] = testUnicodeDocument.offsetToPosition(v);
981 				}
982 				static foreach (fi, fun; funs)
983 				{
984 					sw.reset();
985 					sw.start();
986 					foreach (i; 0 .. iterations)
987 					{
988 						foreach (v; 0 .. PositionCount)
989 						{
990 							static if (fun[0] == 'b')
991 								mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testBytes[v]).sumVal;");
992 							else static if (fun[0] == 'o')
993 								mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testOffsets[v]).sumVal;");
994 							else static if (fun[0] == 'p')
995 								mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testPositions[v]).sumVal;");
996 							else
997 								static assert(false);
998 						}
999 					}
1000 					sw.stop();
1001 					times[fi][t] = sw.peek;
1002 				}
1003 			}
1004 			static foreach (fi, fun; funs)
1005 			{
1006 				writeln(fun, ": ", formatDurationDistribution(times[fi]));
1007 			}
1008 			writeln();
1009 			writeln();
1010 		}
1011 
1012 		writeln("tricking the optimizer", debugSum);
1013 	}
1014 
1015 	private pragma(inline, true) size_t sumVal(size_t v) pure @safe nothrow @nogc
1016 	{
1017 		return v;
1018 	}
1019 
1020 	private pragma(inline, true) size_t sumVal(Position v) pure @trusted nothrow @nogc
1021 	{
1022 		return cast(size_t)*(cast(ulong*)&v);
1023 	}
1024 
1025 	private string formatDurationDistribution(size_t n)(Duration[n] durs)
1026 	{
1027 		import std.algorithm : fold, map, sort, sum;
1028 		import std.format : format;
1029 		import std.math : sqrt;
1030 
1031 		Duration total = durs[].fold!"a+b";
1032 		sort!"a<b"(durs[]);
1033 		double msAvg = cast(double) total.total!"hnsecs" / 10_000.0 / n;
1034 		double msMedian = cast(double) durs[$ / 2].total!"hnsecs" / 10_000.0;
1035 		double[n] diffs = 0;
1036 		foreach (i, dur; durs)
1037 			diffs[i] = (cast(double) dur.total!"hnsecs" / 10_000.0) - msAvg;
1038 		double msStdDeviation = diffs[].map!"a*a".sum.sqrt;
1039 		return format!"[avg=%.4fms, median=%.4f, sd=%.4f]"(msAvg, msMedian, msStdDeviation);
1040 	}
1041 }