1 module served.lsp.textdocumentmanager; 2 3 import std.algorithm; 4 import std.experimental.logger; 5 import std.json; 6 import std.string; 7 import std.utf : codeLength, decode, UseReplacementDchar; 8 9 import served.lsp.jsonrpc; 10 import served.lsp.protocol; 11 12 /// in-memory representation of a file at any given URI. Not thread-safe. 13 struct Document 14 { 15 /// The URI of this document. Should not be changed. 16 DocumentUri uri; 17 /// The language ID as reported by the client. Should not be changed. 18 string languageId; 19 /// The document version as reported by the client. Should not be changed. 20 long version_; 21 private char[] text; 22 23 /// Returns the language ID or guesses it given the filename's extension. 24 /// Returns null if none is set and can't be guessed. 25 /// 26 /// Guessing Map: 27 /// * `.d|.di` = `"d"` 28 /// * `.dpp` = `"dpp"` 29 /// * `.c` = `"c"` 30 /// * `.cpp` = `"cpp"` 31 /// * `.ds|.dscript` = `"dscript"` 32 /// * `.dml` = `"dml"` 33 /// * `.sdl` = `"sdl"` 34 /// * `.dt` = `"diet"` 35 /// * `.json` = `"json"` 36 string getLanguageId() const @property @trusted @nogc nothrow 37 { 38 if (!languageId.length) 39 { 40 import std.path : extension; 41 import std.uni : sicmp; 42 43 const ext = uri.extension; 44 if (ext.sicmp(".d") == 0 || ext.sicmp(".di") == 0) 45 return "d"; 46 else if (ext.sicmp(".dpp") == 0) 47 return "dpp"; 48 else if (ext.sicmp(".c") == 0) 49 return "c"; 50 else if (ext.sicmp(".cpp") == 0) 51 return "cpp"; 52 else if (ext.sicmp(".ds") == 0 || ext.sicmp(".dscript") == 0) 53 return "dscript"; 54 else if (ext.sicmp(".dml") == 0) 55 return "dml"; 56 else if (ext.sicmp(".sdl") == 0) 57 return "sdl"; 58 else if (ext.sicmp(".dt") == 0) 59 return "diet"; 60 else if (ext.sicmp(".json") == 0) 61 return "json"; 62 else 63 return null; 64 } 65 66 return languageId; 67 } 68 69 /// 70 unittest 71 { 72 Document d; 73 assert(d.getLanguageId == null); 74 d.uri = "file:///home/project/app.d"; 75 assert(d.getLanguageId == "d"); 76 d.languageId = "cpp"; 77 assert(d.getLanguageId == "cpp"); 78 } 79 80 /// Creates a new document at the given document URI, with version 0 and 81 /// no text and guessed language ID. See $(LREF getLanguageId) 82 this(DocumentUri uri) 83 { 84 this.uri = uri; 85 languageId = getLanguageId; 86 version_ = 0; 87 text = null; 88 } 89 90 /// 91 unittest 92 { 93 auto doc = Document("file:///home/projects/app.d"); 94 assert(doc.uri == "file:///home/projects/app.d"); 95 assert(doc.languageId == "d"); 96 assert(doc.version_ == 0); 97 assert(!doc.rawText.length); 98 } 99 100 /// Creates a new document at the given document URI, with the given version 101 /// and language and creates a copy of the text to use. 102 this(TextDocumentItem doc) 103 { 104 uri = doc.uri; 105 languageId = doc.languageId; 106 version_ = doc.version_; 107 text = doc.text.dup; 108 } 109 110 /// 111 unittest 112 { 113 // e.g. received from LSP client 114 TextDocumentItem item = { 115 uri: "file:///home/projects/app.c", 116 languageId: "cpp", 117 version_: 0, 118 text: "#include <stdio>", 119 }; 120 auto doc = Document(item); 121 assert(doc.length == "#include <stdio>".length); 122 } 123 124 /// Creates a document with no URI and no language ID and copies the content 125 /// into the text buffer using $(LREF setContent). 126 static Document nullDocument(scope const(char)[] content) 127 { 128 Document ret; 129 ret.setContent(content); 130 return ret; 131 } 132 133 /// 134 unittest 135 { 136 auto doc = Document.nullDocument(`import std.stdio;`); 137 assert(!doc.languageId.length); 138 assert(doc.version_ == 0); 139 assert(!doc.uri.length); 140 assert(doc.rawText == "import std.stdio;"); 141 } 142 143 /// Returns a copy of this document with the text memory duplicated. 144 /// May safely be cast to immutable. 145 Document clone() const 146 { 147 Document ret; 148 ret.uri = uri; 149 ret.version_ = version_; 150 ret.languageId = languageId; 151 ret.text = text.dup; 152 return ret; 153 } 154 155 version (unittest) private static Document nullDocumentOwnMemory(char[] content) 156 { 157 Document ret; 158 ret.text = content; 159 return ret; 160 } 161 162 /// Returns a read-only view of the text. The text may however be changed 163 /// by other operations, so this slice should be used directly and not after 164 /// any context yield or API call potentially modifying the data. 165 /// 166 /// If used on an immutable Document, the text cannot be changed and thus 167 /// returns a full string instead of a const(char)[] slice. 168 const(char)[] rawText() const 169 { 170 return text; 171 } 172 173 /// ditto 174 string rawText() immutable 175 { 176 return text; 177 } 178 179 /// Returns the text length. 180 size_t length() const @property 181 { 182 return text.length; 183 } 184 185 /// Sets the content of this document to the given content. Copies the data 186 /// from newContent into this text buffer. 187 /// 188 /// Should not be called as an API unless managing some kind of virtual 189 /// document manually. 190 ref typeof(this) setContent(scope const(char)[] newContent) return 191 { 192 if (newContent.length < text.length) 193 { 194 text.ptr[0 .. newContent.length] = newContent; 195 text.ptr[newContent.length] = '\0'; // insert null byte to find corruptions 196 text.length = newContent.length; 197 text = text.assumeSafeAppend; 198 } 199 else 200 { 201 text = text.assumeSafeAppend; 202 text.length = newContent.length; 203 text[0 .. $] = newContent; 204 } 205 return this; 206 } 207 208 /// 209 void applyChange(TextRange range, scope const(char)[] newContent) 210 { 211 auto start = positionToBytes(range[0]); 212 auto end = positionToBytes(range[1]); 213 214 if (start > end) 215 swap(start, end); 216 217 if (start == 0 && end == text.length) 218 { 219 setContent(newContent); 220 return; 221 } 222 223 auto addition = newContent.representation; 224 int removed = cast(int) end - cast(int) start; 225 int added = cast(int) addition.length - removed; 226 text = text.assumeSafeAppend; 227 if (added > 0) 228 { 229 text.length += added; 230 // text[end + added .. $] = text[end .. $ - added]; 231 for (int i = cast(int) text.length - 1; i >= end + added; i--) 232 text[i] = text[i - added]; 233 } 234 else if (added < 0) 235 { 236 for (size_t i = start; i < text.length + added; i++) 237 text[i] = text[i - added]; 238 239 text = text[0 .. $ + added]; 240 } 241 text = text.assumeSafeAppend; 242 243 foreach (i, c; addition) 244 text[start + i] = cast(char) c; 245 } 246 247 /// Converts an LSP offset to a byte offset for using for example in array 248 /// slicing. 249 size_t offsetToBytes(size_t offset) const 250 { 251 return .countBytesUntilUTF16Index(text, offset); 252 } 253 254 /// Converts a byte offset to an LSP offset. 255 size_t bytesToOffset(size_t bytes) const 256 { 257 return .countUTF16Length(text[0 .. min($, bytes)]); 258 } 259 260 /// Converts a line/column position to an LSP offset. 261 size_t positionToOffset(Position position) const 262 { 263 size_t offset = 0; 264 size_t bytes = 0; 265 while (bytes < text.length && position.line > 0) 266 { 267 const c = text.ptr[bytes]; 268 if (c == '\n') 269 position.line--; 270 utf16DecodeUtf8Length(c, offset, bytes); 271 } 272 273 while (bytes < text.length && position.character > 0) 274 { 275 const c = text.ptr[bytes]; 276 if (c == '\n') 277 break; 278 size_t utf16Size; 279 utf16DecodeUtf8Length(c, utf16Size, bytes); 280 if (utf16Size < position.character) 281 position.character -= utf16Size; 282 else 283 position.character = 0; 284 offset += utf16Size; 285 } 286 return offset; 287 } 288 289 /// Converts a line/column position to a byte offset. 290 size_t positionToBytes(Position position) const 291 { 292 size_t index = 0; 293 while (index < text.length && position.line > 0) 294 if (text.ptr[index++] == '\n') 295 position.line--; 296 297 while (index < text.length && position.character > 0) 298 { 299 const c = text.ptr[index]; 300 if (c == '\n') 301 break; 302 size_t utf16Size; 303 utf16DecodeUtf8Length(c, utf16Size, index); 304 if (utf16Size < position.character) 305 position.character -= utf16Size; 306 else 307 position.character = 0; 308 } 309 return index; 310 } 311 312 /// Converts an LSP offset to a line/column position. 313 Position offsetToPosition(size_t offset) const 314 { 315 size_t bytes; 316 size_t index; 317 size_t lastNl = -1; 318 319 Position ret; 320 while (bytes < text.length && index < offset) 321 { 322 const c = text.ptr[bytes]; 323 if (c == '\n') 324 { 325 ret.line++; 326 lastNl = index; 327 } 328 utf16DecodeUtf8Length(c, index, bytes); 329 } 330 const start = lastNl + 1; 331 ret.character = cast(uint)(index - start); 332 return ret; 333 } 334 335 /// Converts a byte offset to a line/column position. 336 Position bytesToPosition(size_t bytes) const 337 { 338 if (bytes > text.length) 339 bytes = text.length; 340 auto part = text.ptr[0 .. bytes].representation; 341 size_t lastNl = -1; 342 Position ret; 343 foreach (i; 0 .. bytes) 344 { 345 if (part.ptr[i] == '\n') 346 { 347 ret.line++; 348 lastNl = i; 349 } 350 } 351 ret.character = cast(uint)(cast(const(char)[]) part[lastNl + 1 .. $]).countUTF16Length; 352 return ret; 353 } 354 355 /// Converts a line/column byte offset to a line/column position. 356 Position lineColumnBytesToPosition(uint line, uint column) const 357 { 358 scope lineText = lineAtScope(line).chomp(); 359 uint offset = 0; 360 // keep over-extending positions 361 if (column > lineText.length) 362 { 363 offset = column - cast(uint)lineText.length; 364 column = cast(uint)lineText.length; 365 } 366 // utf16 length is always gonna be less than byte length, so adding offset will never overflow 367 return Position(line, cast(uint)lineText[0 .. column].countUTF16Length + offset); 368 } 369 370 /// Returns the position at "end" starting from the given "src" position which is assumed to be at byte "start" 371 /// Faster to quickly calculate nearby positions of known byte positions. 372 /// Falls back to $(LREF bytesToPosition) if end is before start. 373 Position movePositionBytes(Position src, size_t start, size_t end) const 374 { 375 if (end == start) 376 return src; 377 if (end < start) 378 return bytesToPosition(end); 379 380 auto t = text.ptr[min(text.length, start) .. min(text.length, end)]; 381 size_t bytes; 382 while (bytes < t.length) 383 { 384 const c = t.ptr[bytes]; 385 if (c == '\n') 386 { 387 src.line++; 388 src.character = 0; 389 bytes++; 390 } 391 else 392 utf16DecodeUtf8Length(c, src.character, bytes); 393 } 394 return src; 395 } 396 397 /// 398 unittest 399 { 400 import std.regex; 401 402 auto intRegex = regex(`\bint\b`); 403 404 Document d; 405 d.setContent("int foo(int x, uint y)\n{\n return cast(int)(x + y);\n}\n"); 406 407 // either use size_t.max or 0, both work as starting points for different reasons: 408 // - 0 always matches Position.init, so the offset can be calculated 409 // - size_t.max is larger than the checked index match, so position is recomputed 410 size_t lastIndex = size_t.max; 411 Position lastPosition; 412 413 Position[] matches; 414 415 foreach (match; d.rawText.matchAll(intRegex)) 416 { 417 size_t index = match.pre.length; 418 // to reduce boilerplate, use d.nextPositionBytes instead! 419 auto pos = d.movePositionBytes(lastPosition, lastIndex, index); 420 lastIndex = index; 421 lastPosition = pos; 422 matches ~= pos; 423 } 424 425 assert(matches == [ 426 Position(0, 0), 427 Position(0, 8), 428 Position(2, 16) 429 ]); 430 } 431 432 /// Calls $(LREF movePositionBytes), updates src to be the return value and 433 /// updates start to become end. This reduces boilerplate in common calling 434 /// scenarios. 435 Position nextPositionBytes(ref Position src, ref size_t start, size_t end) const 436 { 437 auto pos = movePositionBytes(src, start, end); 438 src = pos; 439 start = end; 440 return pos; 441 } 442 443 /// 444 unittest 445 { 446 import std.regex; 447 448 auto intRegex = regex(`\bint\b`); 449 450 Document d; 451 d.setContent("int foo(int x, uint y)\n{\n return cast(int)(x + y);\n}\n"); 452 453 size_t lastIndex = size_t.max; 454 Position lastPosition; 455 456 Position[] matches; 457 foreach (match; d.rawText.matchAll(intRegex)) 458 matches ~= d.nextPositionBytes(lastPosition, lastIndex, match.pre.length); 459 460 assert(matches == [ 461 Position(0, 0), 462 Position(0, 8), 463 Position(2, 16) 464 ]); 465 } 466 467 /// Returns the word range at a given line/column position. 468 TextRange wordRangeAt(Position position) const 469 { 470 auto chars = wordInLine(lineAtScope(position), position.character); 471 return TextRange(Position(position.line, chars[0]), Position(position.line, chars[1])); 472 } 473 474 /// 475 unittest 476 { 477 Document d; 478 d.setContent(`void main() { writeln("hello world"); }`); 479 assert(d.wordRangeAt(Position(0, 0)) == TextRange(0, 0, 0, 4)); 480 } 481 482 /// Returns the word range at a given byte position. 483 size_t[2] wordRangeAt(size_t bytes) const 484 { 485 auto lineStart = text.lastIndexOf('\n', bytes) + 1; 486 auto ret = wordInLineBytes( 487 text.ptr[lineStart .. text.length], 488 cast(uint)(bytes - lineStart)); 489 ret[0] += lineStart; 490 ret[1] += lineStart; 491 return ret; 492 } 493 494 /// 495 unittest 496 { 497 Document d; 498 d.setContent(`void main() { writeln("hello world"); }`); 499 assert(d.wordRangeAt(0) == [0, 4]); 500 assert(d.wordRangeAt(3) == [0, 4]); 501 assert(d.wordRangeAt(4) == [0, 4]); 502 assert(d.wordRangeAt(5) == [5, 9]); 503 assert(d.wordRangeAt(9) == [5, 9]); 504 assert(d.wordRangeAt(10) == [10, 10]); 505 assert(d.wordRangeAt(14) == [14, 21]); 506 assert(d.wordRangeAt(20) == [14, 21]); 507 assert(d.wordRangeAt(21) == [14, 21]); 508 assert(d.wordRangeAt(23) == [23, 28]); 509 assert(d.wordRangeAt(27) == [23, 28]); 510 assert(d.wordRangeAt(28) == [23, 28]); 511 assert(d.wordRangeAt(29) == [29, 34]); 512 assert(d.wordRangeAt(30) == [29, 34]); 513 assert(d.wordRangeAt(34) == [29, 34]); 514 } 515 516 /// Returns a byte offset range as `[start, end]` of the given 0-based line 517 /// number. Contains the line terminator, if it exists. 518 size_t[2] lineByteRangeAt(uint line) const 519 { 520 size_t start = 0; 521 size_t index = 0; 522 while (line > 0 && index < text.length) 523 { 524 const c = text.ptr[index++]; 525 if (c == '\n') 526 { 527 line--; 528 start = index; 529 } 530 } 531 // if !found 532 if (line != 0) 533 return [0, 0]; 534 535 auto end = text.indexOf('\n', start); 536 if (end == -1) 537 end = text.length; 538 else 539 end++; 540 541 return [start, end]; 542 } 543 544 /// Returns the text of a line at the given position. 545 /// 546 /// Contains the line terminator, if it exists. 547 /// 548 /// The overload taking in a position just calls the overload taking a line 549 /// with the line being the position's line. 550 string lineAt(Position position) const 551 { 552 return lineAt(position.line); 553 } 554 555 /// ditto 556 string lineAt(Position position) immutable 557 { 558 return lineAt(position.line); 559 } 560 561 /// Returns the text of a line starting at line 0. 562 /// 563 /// Contains the line terminator, if it exists. 564 string lineAt(uint line) const 565 { 566 return lineAtScope(line).idup; 567 } 568 569 /// ditto 570 string lineAt(uint line) immutable 571 { 572 return lineAtScope(line); 573 } 574 575 /// 576 unittest 577 { 578 Document d = Document("file:///home/projects/app.d"); 579 d.setContent("im"); 580 581 immutable d2 = cast(immutable)d.clone.setContent("import std.stdio;\nvoid main() {}"); 582 583 static assert(is(typeof(d.lineAtScope(0)) == const(char)[])); 584 static assert(is(typeof(d2.lineAtScope(0)) == string)); 585 static assert(is(typeof(d.lineAt(0)) == string)); 586 static assert(is(typeof(d2.lineAt(0)) == string)); 587 588 assert(d.lineAt(0) == "im"); 589 assert(d2.lineAt(0) == "import std.stdio;\n"); 590 591 assert(d.lineAtScope(0) == "im"); 592 assert(d2.lineAtScope(0) == "import std.stdio;\n"); 593 594 assert(d.lineAt(0).ptr !is d.rawText.ptr); 595 assert(d2.lineAt(0).ptr is d2.rawText.ptr); 596 } 597 598 /// Returns the line text at the given position. The memory content may be 599 /// modified by the $(LREF setContent) method by other code in the same 600 /// context or in a different context. 601 /// 602 /// The overload taking in a position just calls the overload taking a line 603 /// with the line being the position's line. 604 /// 605 /// Contains the line terminator, if it exists. 606 /// 607 /// See_Also: $(LREF lineAt) to get the same content, but with duplicated 608 /// memory, so it can be stored for later use. 609 scope auto lineAtScope(Position position) const inout 610 { 611 return lineAtScope(position.line); 612 } 613 614 /// ditto 615 scope auto lineAtScope(uint line) const inout 616 { 617 auto range = lineByteRangeAt(line); 618 return text[range[0] .. range[1]]; 619 } 620 621 /// 622 unittest 623 { 624 void assertEqual(A, B)(A a, B b) 625 { 626 import std.conv : to; 627 628 assert(a == b, a.to!string ~ " is not equal to " ~ b.to!string); 629 } 630 631 Document doc; 632 doc.setContent(`abc 633 hellö world 634 how åre 635 you?`); 636 assertEqual(doc.lineAt(Position(0, 0)), "abc\n"); 637 assertEqual(doc.lineAt(Position(0, 100)), "abc\n"); 638 assertEqual(doc.lineAt(Position(1, 3)), "hellö world\n"); 639 assertEqual(doc.lineAt(Position(2, 0)), "how åre\n"); 640 assertEqual(doc.lineAt(Position(3, 0)), "you?"); 641 assertEqual(doc.lineAt(Position(3, 8)), "you?"); 642 assertEqual(doc.lineAt(Position(4, 0)), ""); 643 } 644 645 /// Returns how a line is terminated at the given 0-based line number. 646 /// Defaults to LF for the last line / no line terminator. 647 EolType eolAt(int line) const 648 { 649 size_t index = 0; 650 int curLine = 0; 651 bool prevWasCr = false; 652 while (index < text.length) 653 { 654 auto c = decode!(UseReplacementDchar.yes)(text, index); 655 if (c == '\n') 656 { 657 if (curLine == line) 658 { 659 return prevWasCr ? EolType.crlf : EolType.lf; 660 } 661 curLine++; 662 } 663 prevWasCr = c == '\r'; 664 } 665 return EolType.lf; 666 } 667 668 /// 669 unittest 670 { 671 auto d = Document("file:///home/projects/app.d"); 672 d.setContent("import std.stdio;\nvoid main() {\r\n\twriteln(`hello world`);\r}"); 673 // \r is not supported as line terminator 674 assert(d.lineAt(2) == "\twriteln(`hello world`);\r}"); 675 676 assert(d.eolAt(0) == EolType.lf); 677 assert(d.eolAt(1) == EolType.crlf); 678 assert(d.eolAt(2) == EolType.lf); 679 } 680 } 681 682 /// Helper struct which should have one unique instance in the application which 683 /// processes document events sent by a LSP client to an LSP server and creates 684 /// an in-memory representation of all the files managed by the client. 685 /// 686 /// This data structure is not thread safe. 687 struct TextDocumentManager 688 { 689 /// Internal document storage. Only iterate over this using `foreach`, other 690 /// operations are not considered officially supported. 691 Document[] documentStore; 692 693 /// Same as $(LREF tryGet) but throws an exception if the URI doesn't exist. 694 ref Document opIndex(string uri) 695 { 696 auto idx = documentStore.countUntil!(a => a.uri == uri); 697 if (idx == -1) 698 throw new Exception("Document '" ~ uri ~ "' not found"); 699 return documentStore[idx]; 700 } 701 702 /// Tries to get a document from a URI, returns Document.init if it is not 703 /// in the in-memory cache / not sent by the client. 704 Document tryGet(string uri) 705 { 706 auto idx = documentStore.countUntil!(a => a.uri == uri); 707 if (idx == -1) 708 return Document.init; 709 return documentStore[idx]; 710 } 711 712 deprecated ref Document loadFromFilesystem()(string uri) 713 { 714 static assert(false, "use getOrFromFilesystem instead (LSP open takes priority over filesystem)"); 715 } 716 717 /// Returns the managed document for the given URI or if it doesn't exist 718 /// it tries to read the file from the filesystem and open it from that. 719 /// 720 /// Note that a LSP close method will unload this early. 721 /// 722 /// Params: 723 /// uri = the document URI to try to load. Must be consistent with LSP 724 /// URIs. (e.g. normalized URIs) 725 /// inserted = if specified, gets set to true if the file was read from 726 /// filesystem and false if it was already present. 727 /// 728 /// Returns: the created document 729 /// 730 /// Throws: FileException in case the file doesn't exist or other file 731 /// system errors. In this case no new document should have been 732 /// inserted yet. 733 ref Document getOrFromFilesystem(string uri, out bool inserted) 734 { 735 import served.lsp.uri : uriToFile; 736 import fs = std.file; 737 738 auto idx = documentStore.countUntil!(a => a.uri == uri); 739 if (idx != -1) 740 { 741 inserted = false; 742 return documentStore[idx]; 743 } 744 745 string path = uriToFile(uri); 746 auto content = fs.readText(path); 747 748 auto index = documentStore.length++; 749 documentStore[index].uri = uri; 750 documentStore[index].version_ = -1; 751 documentStore[index].setContent(content); 752 inserted = true; 753 return documentStore[index]; 754 } 755 756 /// 757 unittest 758 { 759 import served.lsp.uri; 760 761 import std.file; 762 import std.path; 763 764 auto dir = buildPath(tempDir(), "textdocumentmanager"); 765 mkdir(dir); 766 scope (exit) 767 rmdirRecurse(dir); 768 769 auto app_d = buildPath(dir, "app.d"); 770 auto src = "import std.stdio; void main() { writeln(`hello world`); }"; 771 write(app_d, src); 772 773 TextDocumentManager documents; 774 bool created; 775 auto doc = &documents.getOrFromFilesystem(uriFromFile(app_d), created); 776 assert(created); 777 auto other = &documents.getOrFromFilesystem(uriFromFile(app_d)); 778 assert(doc is other); 779 780 assert(doc.rawText == src); 781 assert(doc.rawText !is src); 782 } 783 784 /// ditto 785 ref Document getOrFromFilesystem(string uri) 786 { 787 bool b; 788 return getOrFromFilesystem(uri, b); 789 } 790 791 /// Unloads the given URI so it's no longer accessible. Note that this 792 /// should only be done for documents loaded manually and never for LSP 793 /// documents as it will break all features in that file until reopened. 794 bool unloadDocument(string uri) 795 { 796 auto idx = documentStore.countUntil!(a => a.uri == uri); 797 if (idx == -1) 798 return false; 799 800 documentStore[idx] = documentStore[$ - 1]; 801 documentStore.length--; 802 documentStore = documentStore.assumeSafeAppend; 803 return true; 804 } 805 806 /// Returns the currently preferred syncKind to use with the client. 807 /// Additionally always supports the `full` sync kind. 808 static TextDocumentSyncKind syncKind() 809 { 810 return TextDocumentSyncKind.incremental; 811 } 812 813 /// 814 unittest 815 { 816 assert(TextDocumentManager.syncKind == TextDocumentSyncKind.incremental); 817 } 818 819 /// Inserts a document manually or updates an existing one, acting like 820 /// textDocument/didOpen if it didn't exist or fully replacing the document 821 /// if it did exist. 822 ref Document insertOrUpdate(Document d) 823 { 824 auto idx = documentStore.countUntil!(a => a.uri == d.uri); 825 if (idx != -1) 826 { 827 return documentStore[idx] = d; 828 } 829 else 830 { 831 auto index = documentStore.length++; 832 return documentStore[index] = d; 833 } 834 } 835 836 /// Processes an LSP packet and performs the document update in-memory that 837 /// is requested. 838 /// Params: 839 /// msg = The request sent by a client. This method only processes 840 /// `textDocument/` messages which are relevant to file modification. 841 /// Returns: `true` if the given method was handled, `false` otherwise. 842 bool process(RequestMessageRaw msg) 843 { 844 if (msg.method == "textDocument/didOpen") 845 { 846 auto params = msg.paramsJson.deserializeJson!DidOpenTextDocumentParams; 847 // there may be at most one didOpen request, but library code can 848 // load files from the filesystem 849 insertOrUpdate(Document(params.textDocument)); 850 return true; 851 } 852 else if (msg.method == "textDocument/didClose") 853 { 854 auto params = msg.paramsJson.deserializeJson!DidCloseTextDocumentParams; 855 auto targetUri = params.textDocument.uri; 856 if (!unloadDocument(targetUri)) 857 { 858 warning("Received didClose notification for URI not in system: ", targetUri); 859 warning("This can be a potential memory leak if it was previously opened under a different name."); 860 } 861 return true; 862 } 863 else if (msg.method == "textDocument/didChange") 864 { 865 auto params = msg.paramsJson.deserializeJson!DidChangeTextDocumentParams; 866 auto targetUri = params.textDocument.uri; 867 auto idx = documentStore.countUntil!(a => a.uri == targetUri); 868 if (idx >= 0) 869 { 870 documentStore[idx].version_ = params.textDocument.version_; 871 foreach (change; params.contentChanges) 872 { 873 if (!change.range.isNone) 874 documentStore[idx].applyChange(change.range.deref, change.text); 875 else 876 documentStore[idx].setContent(change.text); 877 } 878 } 879 return true; 880 } 881 return false; 882 } 883 } 884 885 /// 886 unittest 887 { 888 import std.exception; 889 890 TextDocumentManager documents; 891 // most common usage, forward LSP events to this helper struct. 892 RequestMessageRaw incomingPacket = { 893 // dummy data 894 method: "textDocument/didOpen", 895 paramsJson: `{ 896 "textDocument": { 897 "uri": "file:///home/projects/app.d", 898 "languageId": "d", 899 "version": 123, 900 "text": "import std.stdio;\n\nvoid main()\n{\n\twriteln(\"hello world\");\n}\n" 901 } 902 }` 903 }; 904 documents.process(incomingPacket); 905 // documents.process returns false if it's not a method meant for text 906 // document management. serve-d:serverbase abstracts this away automatically. 907 908 // normally used from LSP methods where you have params like this 909 TextDocumentPositionParams params = { 910 textDocument: TextDocumentIdentifier("file:///home/projects/app.d"), 911 position: Position(4, 2) 912 }; 913 914 // if it's sent by the LSP, the document being loaded should be almost guaranteed. 915 auto doc = documents[params.textDocument.uri]; 916 // trying to index files that haven't been sent by the client will throw an Exception 917 assertThrown(documents["file:///path/to/non-registered.d"]); 918 919 // you can use tryGet to see if a Document has been opened yet and use it if so. 920 assert(documents.tryGet("file:///path/to/non-registered.d") is Document.init); 921 assert(documents.tryGet(params.textDocument.uri) !is Document.init); 922 923 // Document defines a variety of utility functions that have been optimized 924 // for speed and convenience. 925 assert(doc.lineAtScope(params.position) == "\twriteln(\"hello world\");\n"); 926 927 auto range = doc.wordRangeAt(params.position); 928 assert(doc.positionToBytes(range.start) == 34); 929 assert(doc.positionToBytes(range.end) == 41); 930 931 // when yielding (Fiber context switch) documents may be modified or deleted though: 932 933 RequestMessageRaw incomingPacket2 = { 934 // dummy data 935 method: "textDocument/didChange", 936 paramsJson: `{ 937 "textDocument": { 938 "uri": "file:///home/projects/app.d", 939 "version": 124 940 }, 941 "contentChanges": [ 942 { 943 "range": { 944 "start": { "line": 4, "character": 6 }, 945 "end": { "line": 4, "character": 8 } 946 }, 947 "text": "" 948 } 949 ] 950 }` 951 }; 952 documents.process(incomingPacket2); 953 954 assert(doc.lineAtScope(params.position) == "\twrite(\"hello world\");\n"); 955 956 RequestMessageRaw incomingPacket3 = { 957 // dummy data 958 method: "textDocument/didChange", 959 paramsJson: `{ 960 "textDocument": { 961 "uri": "file:///home/projects/app.d", 962 "version": 125 963 }, 964 "contentChanges": [ 965 { 966 "text": "replace everything" 967 } 968 ] 969 }` 970 }; 971 documents.process(incomingPacket3); 972 973 // doc.rawText is now half overwritten, you need to refetch a document when yielding or updating: 974 assert(doc.rawText != "replace everything"); 975 doc = documents[params.textDocument.uri]; 976 assert(doc.rawText == "replace everything"); 977 978 RequestMessageRaw incomingPacket4 = { 979 // dummy data 980 method: "textDocument/didClose", 981 paramsJson: `{ 982 "textDocument": { 983 "uri": "file:///home/projects/app.d" 984 } 985 }` 986 }; 987 documents.process(incomingPacket4); 988 989 assertThrown(documents[params.textDocument.uri]); 990 // so make sure that you don't keep references to documents when leaving scope or switching context. 991 } 992 993 /// Helper structure for storing any data of type T on a per-file basis. 994 struct PerDocumentCache(T) 995 { 996 struct Entry 997 { 998 Document document; 999 T data; 1000 } 1001 1002 Entry[] entries; 1003 1004 T cached(ref TextDocumentManager source, string uri) 1005 { 1006 auto newest = source.tryGet(uri); 1007 foreach (entry; entries) 1008 if (entry.document.uri == uri) 1009 { 1010 if (entry.document.version_ >= newest.version_) 1011 return entry.data; 1012 else 1013 return T.init; 1014 } 1015 return T.init; 1016 } 1017 1018 void store(Document document, T data) 1019 { 1020 foreach (ref entry; entries) 1021 { 1022 if (entry.document.uri == document.uri) 1023 { 1024 if (document.version_ >= entry.document.version_) 1025 { 1026 entry.document = document; 1027 entry.data = data; 1028 } 1029 return; 1030 } 1031 } 1032 entries ~= Entry(document, data); 1033 } 1034 } 1035 1036 /// Returns a range of the identifier/word at the given position. 1037 uint[2] wordInLine(const(char)[] line, uint character) 1038 { 1039 return wordInLineImpl!(wchar, uint)(line, character); 1040 } 1041 1042 /// ditto 1043 size_t[2] wordInLineBytes(const(char)[] line, size_t bytes) 1044 { 1045 return wordInLineImpl!(char, size_t)(line, bytes); 1046 } 1047 1048 SizeT[2] wordInLineImpl(CharT, SizeT)(const(char)[] line, SizeT character) 1049 out(r; r[1] >= r[0]) 1050 { 1051 size_t index = 0; 1052 SizeT offs = 0; 1053 1054 SizeT lastStart = 0; 1055 SizeT start = character, end = character; 1056 bool searchStart = true; 1057 1058 while (index < line.length) 1059 { 1060 const c = decode(line, index); 1061 const l = cast(SizeT) c.codeLength!CharT; 1062 1063 if (searchStart) 1064 { 1065 if (isDIdentifierSeparatingChar(c)) 1066 { 1067 if (character == 0) 1068 break; 1069 lastStart = offs + l; 1070 } 1071 1072 if (offs + l >= character) 1073 { 1074 start = lastStart; 1075 searchStart = false; 1076 } 1077 1078 offs += l; 1079 } 1080 else 1081 { 1082 end = offs; 1083 offs += l; 1084 if (isDIdentifierSeparatingChar(c)) 1085 break; 1086 } 1087 } 1088 1089 if (start > line.length) 1090 start = cast(SizeT)line.length; 1091 if (end > line.length) 1092 end = cast(SizeT)line.length; 1093 1094 return [start, end]; 1095 } 1096 1097 unittest 1098 { 1099 string a = "int i;"; 1100 string b = "a (int i;"; 1101 string c = "{int i;"; 1102 string d = "{ int i;"; 1103 assert(a.wordInLineBytes(0) == [0, 3]); 1104 assert(a.wordInLineBytes(1) == [0, 3]); 1105 assert(a.wordInLineBytes(2) == [0, 3]); 1106 assert(a.wordInLineBytes(3) == [0, 3]); 1107 assert(a.wordInLineBytes(4) == [4, 5]); 1108 assert(a.wordInLineBytes(5) == [4, 5]); 1109 assert(a.wordInLineBytes(6) == [6, 6]); 1110 assert(a.wordInLineBytes(7) == [6, 6]); 1111 assert(a.wordInLineBytes(size_t.max) == [6, 6]); 1112 1113 assert(b.wordInLineBytes(0) == [0, 1]); 1114 assert(b.wordInLineBytes(1) == [0, 1]); 1115 assert(b.wordInLineBytes(2) == [2, 2]); 1116 assert(b.wordInLineBytes(3) == [3, 6]); 1117 assert(b.wordInLineBytes(4) == [3, 6]); 1118 assert(b.wordInLineBytes(5) == [3, 6]); 1119 assert(b.wordInLineBytes(6) == [3, 6]); 1120 assert(b.wordInLineBytes(7) == [7, 8]); 1121 assert(b.wordInLineBytes(8) == [7, 8]); 1122 assert(b.wordInLineBytes(9) == [9, 9]); 1123 assert(b.wordInLineBytes(10) == [9, 9]); 1124 assert(b.wordInLineBytes(100) == [9, 9]); 1125 assert(b.wordInLineBytes(size_t.max) == [9, 9]); 1126 1127 assert(c.wordInLineBytes(0) == [0, 0]); 1128 assert(c.wordInLineBytes(1) == [1, 4]); 1129 assert(c.wordInLineBytes(2) == [1, 4]); 1130 assert(c.wordInLineBytes(3) == [1, 4]); 1131 assert(c.wordInLineBytes(4) == [1, 4]); 1132 assert(c.wordInLineBytes(5) == [5, 6]); 1133 assert(c.wordInLineBytes(6) == [5, 6]); 1134 assert(c.wordInLineBytes(7) == [7, 7]); 1135 assert(c.wordInLineBytes(8) == [7, 7]); 1136 assert(c.wordInLineBytes(size_t.max) == [7, 7]); 1137 1138 assert(d.wordInLineBytes(0) == [0, 0]); 1139 assert(d.wordInLineBytes(1) == [1, 1]); 1140 assert(d.wordInLineBytes(2) == [2, 5]); 1141 assert(d.wordInLineBytes(3) == [2, 5]); 1142 assert(d.wordInLineBytes(4) == [2, 5]); 1143 assert(d.wordInLineBytes(5) == [2, 5]); 1144 assert(d.wordInLineBytes(6) == [6, 7]); 1145 assert(d.wordInLineBytes(7) == [6, 7]); 1146 assert(d.wordInLineBytes(8) == [8, 8]); 1147 assert(d.wordInLineBytes(9) == [8, 8]); 1148 assert(d.wordInLineBytes(size_t.max) == [8, 8]); 1149 } 1150 1151 deprecated("use isDIdentifierSeparatingChar instead") 1152 alias isIdentifierSeparatingChar = isDIdentifierSeparatingChar; 1153 1154 /// 1155 bool isDIdentifierSeparatingChar(dchar c) 1156 { 1157 return c < 48 || (c > 57 && c < 65) || c == '[' || c == '\\' || c == ']' 1158 || c == '`' || (c > 122 && c < 128) || c == '\u2028' || c == '\u2029'; // line separators 1159 } 1160 1161 /// 1162 bool isValidDIdentifier(const(char)[] s) 1163 { 1164 import std.ascii : isDigit; 1165 1166 return s.length && !s[0].isDigit && !s.any!isDIdentifierSeparatingChar; 1167 } 1168 1169 unittest 1170 { 1171 assert(!isValidDIdentifier("")); 1172 assert(!isValidDIdentifier("0")); 1173 assert(!isValidDIdentifier("10")); 1174 assert(!isValidDIdentifier("1a")); 1175 assert(isValidDIdentifier("_")); 1176 assert(isValidDIdentifier("a")); 1177 assert(isValidDIdentifier("__helloWorld123")); 1178 } 1179 1180 unittest 1181 { 1182 Document doc; 1183 doc.text.reserve(16); 1184 auto ptr = doc.text.ptr; 1185 assert(doc.rawText.length == 0); 1186 doc.setContent("Hello world"); 1187 assert(doc.rawText == "Hello world"); 1188 doc.setContent("foo"); 1189 assert(doc.rawText == "foo"); 1190 doc.setContent("foo bar baz baf"); 1191 assert(doc.rawText == "foo bar baz baf"); 1192 doc.applyChange(TextRange(0, 4, 0, 8), ""); 1193 assert(doc.rawText == "foo baz baf"); 1194 doc.applyChange(TextRange(0, 4, 0, 8), "bad"); 1195 assert(doc.rawText == "foo badbaf"); 1196 doc.applyChange(TextRange(0, 4, 0, 8), "bath"); 1197 assert(doc.rawText == "foo bathaf"); 1198 doc.applyChange(TextRange(0, 4, 0, 10), "bath"); 1199 assert(doc.rawText == "foo bath"); 1200 doc.applyChange(TextRange(0, 0, 0, 8), "bath"); 1201 assert(doc.rawText == "bath"); 1202 doc.applyChange(TextRange(0, 0, 0, 1), "par"); 1203 assert(doc.rawText == "parath", doc.rawText); 1204 doc.applyChange(TextRange(0, 0, 0, 4), ""); 1205 assert(doc.rawText == "th"); 1206 doc.applyChange(TextRange(0, 2, 0, 2), "e"); 1207 assert(doc.rawText == "the"); 1208 doc.applyChange(TextRange(0, 0, 0, 0), "in"); 1209 assert(doc.rawText == "inthe"); 1210 assert(ptr is doc.text.ptr); 1211 } 1212 1213 pragma(inline, true) private void utf16DecodeUtf8Length(A, B)(char c, ref A utf16Index, 1214 ref B utf8Index) @safe nothrow @nogc 1215 { 1216 switch (c & 0b1111_0000) 1217 { 1218 case 0b1110_0000: 1219 // assume valid encoding (no wrong surrogates) 1220 utf16Index++; 1221 utf8Index += 3; 1222 break; 1223 case 0b1111_0000: 1224 utf16Index += 2; 1225 utf8Index += 4; 1226 break; 1227 case 0b1100_0000: 1228 case 0b1101_0000: 1229 utf16Index++; 1230 utf8Index += 2; 1231 break; 1232 default: 1233 utf16Index++; 1234 utf8Index++; 1235 break; 1236 } 1237 } 1238 1239 pragma(inline, true) size_t countUTF16Length(scope const(char)[] text) @safe nothrow @nogc 1240 { 1241 size_t offset; 1242 size_t index; 1243 while (index < text.length) 1244 { 1245 const c = (() @trusted => text.ptr[index++])(); 1246 if (cast(byte)c >= -0x40) offset++; 1247 if (c >= 0xf0) offset++; 1248 } 1249 return offset; 1250 } 1251 1252 pragma(inline, true) size_t countBytesUntilUTF16Index(scope const(char)[] text, size_t utf16Offset) @safe nothrow @nogc 1253 { 1254 size_t bytes; 1255 size_t offset; 1256 while (offset < utf16Offset && bytes < text.length) 1257 { 1258 char c = (() @trusted => text.ptr[bytes++])(); 1259 if (cast(byte)c >= -0x40) offset++; 1260 if (c >= 0xf0) offset++; 1261 } 1262 while (bytes < text.length) 1263 { 1264 char c = (() @trusted => text.ptr[bytes])(); 1265 if (cast(byte)c >= -0x40) break; 1266 bytes++; 1267 } 1268 return bytes; 1269 } 1270 1271 version (unittest) 1272 { 1273 import core.time; 1274 1275 Document testUnicodeDocument = Document.nullDocumentOwnMemory(cast(char[]) `/// 1276 /// Copyright © 2020 Somebody (not actually™) x3 1277 /// 1278 module some.file; 1279 1280 enum Food : int 1281 { 1282 pizza = '\U0001F355', // 🍕 1283 burger = '\U0001F354', // 🍔 1284 chicken = '\U0001F357', // 🍗 1285 taco = '\U0001F32E', // 🌮 1286 wrap = '\U0001F32F', // 🌯 1287 salad = '\U0001F957', // 🥗 1288 pasta = '\U0001F35D', // 🍝 1289 sushi = '\U0001F363', // 🍣 1290 oden = '\U0001F362', // 🍢 1291 egg = '\U0001F373', // 🍳 1292 croissant = '\U0001F950', // 🥐 1293 baguette = '\U0001F956', // 🥖 1294 popcorn = '\U0001F37F', // 🍿 1295 coffee = '\u2615', // ☕ 1296 cookie = '\U0001F36A', // 🍪 1297 } 1298 1299 void main() { 1300 // taken from https://github.com/DlangRen/Programming-in-D/blob/master/ddili/src/ders/d.cn/aa.d 1301 int[string] colorCodes = [ /* ... */ ]; 1302 1303 if ("purple" in colorCodes) { 1304 // ü®™🍳键 “purple” 在表中 1305 1306 } else { // line 31 1307 //表中不存在 键 “purple” 1308 } 1309 1310 string x; 1311 }`); 1312 1313 enum testSOF_byte = 0; 1314 enum testSOF_offset = 0; 1315 enum testSOF_position = Position(0, 0); 1316 1317 enum testEOF_byte = 872; 1318 enum testEOF_offset = 805; 1319 enum testEOF_position = Position(36, 1); 1320 1321 // in line before unicode 1322 enum testLinePreUni_byte = 757; 1323 enum testLinePreUni_offset = 724; 1324 enum testLinePreUni_position = Position(29, 4); // after `//` 1325 1326 // in line after unicode 1327 enum testLinePostUni_byte = 789; 1328 enum testLinePostUni_offset = 742; 1329 enum testLinePostUni_position = Position(29, 22); // after `purple” 在` 1330 1331 // ascii line after unicode line 1332 enum testMidAsciiLine_byte = 804; 1333 enum testMidAsciiLine_offset = 753; 1334 enum testMidAsciiLine_position = Position(31, 7); 1335 1336 // after unicode, end of line 1337 enum testEOLPostUni_byte = 795; 1338 enum testEOLPostUni_offset = 744; 1339 enum testEOLPostUni_position = Position(29, 24); // after `purple” 在表中` 1340 1341 @("{offset, bytes, position} -> {offset, bytes, position}") 1342 unittest 1343 { 1344 import std.conv; 1345 import std.stdio; 1346 1347 static foreach (test; [ 1348 "SOF", "EOF", "LinePreUni", "LinePostUni", "MidAsciiLine", "EOLPostUni" 1349 ]) 1350 { 1351 { 1352 enum testOffset = mixin("test" ~ test ~ "_offset"); 1353 enum testByte = mixin("test" ~ test ~ "_byte"); 1354 enum testPosition = mixin("test" ~ test ~ "_position"); 1355 1356 writeln(" === Test ", test, " ==="); 1357 1358 writeln(testByte, " byte -> offset ", testOffset); 1359 assert(testUnicodeDocument.bytesToOffset(testByte) == testOffset, 1360 "fail " ~ test ~ " byte->offset = " ~ testUnicodeDocument.bytesToOffset(testByte) 1361 .to!string); 1362 writeln(testByte, " byte -> position ", testPosition); 1363 assert(testUnicodeDocument.bytesToPosition(testByte) == testPosition, 1364 "fail " ~ test ~ " byte->position = " ~ testUnicodeDocument.bytesToPosition(testByte) 1365 .to!string); 1366 1367 writeln(testOffset, " offset -> byte ", testByte); 1368 assert(testUnicodeDocument.offsetToBytes(testOffset) == testByte, 1369 "fail " ~ test ~ " offset->byte = " ~ testUnicodeDocument.offsetToBytes(testOffset) 1370 .to!string); 1371 writeln(testOffset, " offset -> position ", testPosition); 1372 assert(testUnicodeDocument.offsetToPosition(testOffset) == testPosition, 1373 "fail " ~ test ~ " offset->position = " ~ testUnicodeDocument.offsetToPosition(testOffset) 1374 .to!string); 1375 1376 writeln(testPosition, " position -> offset ", testOffset); 1377 assert(testUnicodeDocument.positionToOffset(testPosition) == testOffset, 1378 "fail " ~ test ~ " position->offset = " ~ testUnicodeDocument.positionToOffset(testPosition) 1379 .to!string); 1380 writeln(testPosition, " position -> byte ", testByte); 1381 assert(testUnicodeDocument.positionToBytes(testPosition) == testByte, 1382 "fail " ~ test ~ " position->byte = " ~ testUnicodeDocument.positionToBytes(testPosition) 1383 .to!string); 1384 1385 writeln(); 1386 } 1387 } 1388 1389 const size_t maxBytes = testEOF_byte; 1390 const size_t maxOffset = testEOF_offset; 1391 const Position maxPosition = testEOF_position; 1392 1393 writeln("max offset -> byte"); 1394 assert(testUnicodeDocument.offsetToBytes(size_t.max) == maxBytes); 1395 writeln("max offset -> position"); 1396 assert(testUnicodeDocument.offsetToPosition(size_t.max) == maxPosition); 1397 writeln("max byte -> offset"); 1398 assert(testUnicodeDocument.bytesToOffset(size_t.max) == maxOffset); 1399 writeln("max byte -> position"); 1400 assert(testUnicodeDocument.bytesToPosition(size_t.max) == maxPosition); 1401 writeln("max position -> offset"); 1402 assert(testUnicodeDocument.positionToOffset(Position(uint.max, uint.max)) == maxOffset); 1403 writeln("max position -> byte"); 1404 assert(testUnicodeDocument.positionToBytes(Position(uint.max, uint.max)) == maxBytes); 1405 } 1406 1407 unittest 1408 { 1409 // in line after unicode 1410 foreach (col; cast(uint[])[256, 300, int.max, uint.max]) 1411 { 1412 assert(testUnicodeDocument.positionToBytes(Position(29, col)) == testEOLPostUni_byte); 1413 assert(testUnicodeDocument.positionToOffset(Position(29, col)) == testEOLPostUni_offset); 1414 } 1415 1416 assert(testUnicodeDocument.lineColumnBytesToPosition(29, 42) == Position(29, 24)); 1417 assert(testUnicodeDocument.lineColumnBytesToPosition(29, 43) == Position(29, 25)); 1418 assert(testUnicodeDocument.lineColumnBytesToPosition(29, 4_000_000_042) == Position(29, 4_000_000_024)); 1419 assert(testUnicodeDocument.lineColumnBytesToPosition(29, uint.max) == Position(29, 4_294_967_277)); 1420 } 1421 1422 version (none) 1423 @("character transform benchmarks") 1424 unittest 1425 { 1426 import std.datetime.stopwatch; 1427 import std.random; 1428 import std.stdio; 1429 1430 enum PositionCount = 32; 1431 size_t[PositionCount] testBytes; 1432 size_t[PositionCount] testOffsets; 1433 Position[PositionCount] testPositions; 1434 1435 static immutable funs = [ 1436 "offsetToBytes", "offsetToPosition", "bytesToOffset", "bytesToPosition", 1437 "positionToOffset", "positionToBytes" 1438 ]; 1439 1440 size_t debugSum; 1441 1442 size_t lengthUtf16 = testUnicodeDocument.text.codeLength!wchar; 1443 enum TestRepeats = 10; 1444 Duration[TestRepeats][funs.length] times; 1445 1446 StopWatch sw; 1447 static foreach (iterations; [ 1448 1e3, 1e4, /* 1e5 */ 1449 ]) 1450 { 1451 writeln("=================="); 1452 writeln("Timing ", iterations, "x", PositionCount, "x", TestRepeats, " iterations:"); 1453 foreach (ref row; times) 1454 foreach (ref col; row) 1455 col = Duration.zero; 1456 1457 static foreach (t; 0 .. TestRepeats) 1458 { 1459 foreach (i, ref v; testOffsets) 1460 { 1461 v = uniform(0, lengthUtf16); 1462 testBytes[i] = testUnicodeDocument.offsetToBytes(v); 1463 testPositions[i] = testUnicodeDocument.offsetToPosition(v); 1464 } 1465 static foreach (fi, fun; funs) 1466 { 1467 sw.reset(); 1468 sw.start(); 1469 foreach (i; 0 .. iterations) 1470 { 1471 foreach (v; 0 .. PositionCount) 1472 { 1473 static if (fun[0] == 'b') 1474 mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testBytes[v]).sumVal;"); 1475 else static if (fun[0] == 'o') 1476 mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testOffsets[v]).sumVal;"); 1477 else static if (fun[0] == 'p') 1478 mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testPositions[v]).sumVal;"); 1479 else 1480 static assert(false); 1481 } 1482 } 1483 sw.stop(); 1484 times[fi][t] = sw.peek; 1485 } 1486 } 1487 static foreach (fi, fun; funs) 1488 { 1489 writeln(fun, ": ", formatDurationDistribution(times[fi])); 1490 } 1491 writeln(); 1492 writeln(); 1493 } 1494 1495 writeln("tricking the optimizer", debugSum); 1496 } 1497 1498 private pragma(inline, true) size_t sumVal(size_t v) pure @safe nothrow @nogc 1499 { 1500 return v; 1501 } 1502 1503 private pragma(inline, true) size_t sumVal(Position v) pure @trusted nothrow @nogc 1504 { 1505 return cast(size_t)*(cast(ulong*)&v); 1506 } 1507 1508 private string formatDurationDistribution(size_t n)(Duration[n] durs) 1509 { 1510 import std.algorithm : fold, map, sort, sum; 1511 import std.format : format; 1512 import std.math : sqrt; 1513 1514 Duration total = durs[].fold!"a+b"; 1515 sort!"a<b"(durs[]); 1516 double msAvg = cast(double) total.total!"hnsecs" / 10_000.0 / n; 1517 double msMedian = cast(double) durs[$ / 2].total!"hnsecs" / 10_000.0; 1518 double[n] diffs = 0; 1519 foreach (i, dur; durs) 1520 diffs[i] = (cast(double) dur.total!"hnsecs" / 10_000.0) - msAvg; 1521 double msStdDeviation = diffs[].map!"a*a".sum.sqrt; 1522 return format!"[avg=%.4fms, median=%.4f, sd=%.4f]"(msAvg, msMedian, msStdDeviation); 1523 } 1524 }