1 module served.lsp.textdocumentmanager; 2 3 import std.algorithm; 4 import std.experimental.logger; 5 import std.json; 6 import std..string; 7 import std.utf : codeLength, decode, UseReplacementDchar; 8 9 import served.lsp.jsonrpc; 10 import served.lsp.protocol; 11 12 import painlessjson; 13 14 /// in-memory representation of a file at any given URI. Not thread-safe. 15 struct Document 16 { 17 /// The URI of this document. Should not be changed. 18 DocumentUri uri; 19 /// The language ID as reported by the client. Should not be changed. 20 string languageId; 21 /// The document version as reported by the client. Should not be changed. 22 long version_; 23 private char[] text; 24 25 string getLanguageId() const @property @trusted @nogc nothrow 26 { 27 if (!languageId.length) 28 { 29 import std.path : extension; 30 import std.uni : sicmp; 31 32 const ext = uri.extension; 33 if (ext.sicmp(".d") == 0) 34 return "d"; 35 else if (ext.sicmp(".dpp") == 0) 36 return "dpp"; 37 else if (ext.sicmp(".ds") == 0 || ext.sicmp(".dscript") == 0) 38 return "dscript"; 39 else if (ext.sicmp(".dml") == 0) 40 return "dml"; 41 else if (ext.sicmp(".sdl") == 0) 42 return "sdl"; 43 else if (ext.sicmp(".dt") == 0) 44 return "diet"; 45 else 46 return null; 47 } 48 49 return languageId; 50 } 51 52 /// Creates a new D document at the given document URI, with version 0 and 53 /// no text. 54 this(DocumentUri uri) 55 { 56 this.uri = uri; 57 languageId = "d"; 58 version_ = 0; 59 text = null; 60 } 61 62 /// Creates a new document at the given document URI, with the given version 63 /// and language and creates a copy of the text to use. 64 this(TextDocumentItem doc) 65 { 66 uri = doc.uri; 67 languageId = doc.languageId; 68 version_ = doc.version_; 69 text = doc.text.dup; 70 } 71 72 /// Creates a document with no URI and no language ID and copies the content 73 /// into the text buffer using $(LREF setContent). 74 static Document nullDocument(scope const(char)[] content) 75 { 76 Document ret; 77 ret.setContent(content); 78 return ret; 79 } 80 81 immutable(Document) clone() 82 { 83 Document ret = this; 84 ret.text = text.dup; 85 return cast(immutable) ret; 86 } 87 88 version (unittest) private static Document nullDocumentOwnMemory(char[] content) 89 { 90 Document ret; 91 ret.text = content; 92 return ret; 93 } 94 95 /// Returns a read-only view of the text. The text may however be changed 96 /// by other operations, so this slice should be used directly and not after 97 /// any context yield or API call potentially modifying the data. 98 const(char)[] rawText() const 99 { 100 return cast(const(char)[]) text; 101 } 102 103 string rawText() immutable 104 { 105 return text; 106 } 107 108 /// 109 size_t length() const @property 110 { 111 return text.length; 112 } 113 114 /// Sets the content of this document to the given content. Copies the data 115 /// from newContent into this text buffer. 116 /// 117 /// Should not be called as an API unless managing some kind of virtual 118 /// document manually. 119 void setContent(scope const(char)[] newContent) 120 { 121 if (newContent.length <= text.length) 122 { 123 text[0 .. newContent.length] = newContent; 124 text.length = newContent.length; 125 } 126 else 127 { 128 text = text.assumeSafeAppend; 129 text.length = newContent.length; 130 text = text.assumeSafeAppend; 131 text[0 .. $] = newContent; 132 } 133 } 134 135 /// 136 void applyChange(TextRange range, scope const(char)[] newContent) 137 { 138 auto start = positionToBytes(range[0]); 139 auto end = positionToBytes(range[1]); 140 141 if (start > end) 142 swap(start, end); 143 144 if (start == 0 && end == text.length) 145 { 146 setContent(newContent); 147 return; 148 } 149 150 auto addition = newContent.representation; 151 int removed = cast(int) end - cast(int) start; 152 int added = cast(int) addition.length - removed; 153 text = text.assumeSafeAppend; 154 if (added > 0) 155 { 156 text.length += added; 157 // text[end + added .. $] = text[end .. $ - added]; 158 for (int i = cast(int) text.length - 1; i >= end + added; i--) 159 text[i] = text[i - added]; 160 } 161 else if (added < 0) 162 { 163 for (size_t i = start; i < text.length + added; i++) 164 text[i] = text[i - added]; 165 166 text = text[0 .. $ + added]; 167 } 168 text = text.assumeSafeAppend; 169 170 foreach (i, c; addition) 171 text[start + i] = cast(char) c; 172 } 173 174 /// Converts an LSP offset to a byte offset for using for example in array 175 /// slicing. 176 size_t offsetToBytes(size_t offset) const 177 { 178 return .countBytesUntilUTF16Index(text, offset); 179 } 180 181 /// Converts a byte offset to an LSP offset. 182 size_t bytesToOffset(size_t bytes) const 183 { 184 return .countUTF16Length(text[0 .. min($, bytes)]); 185 } 186 187 /// Converts a line/column position to an LSP offset. 188 size_t positionToOffset(Position position) const 189 { 190 size_t offset = 0; 191 size_t bytes = 0; 192 while (bytes < text.length && position.line > 0) 193 { 194 const c = text.ptr[bytes]; 195 if (c == '\n') 196 position.line--; 197 utf16DecodeUtf8Length(c, offset, bytes); 198 } 199 200 while (bytes < text.length && position.character > 0) 201 { 202 const c = text.ptr[bytes]; 203 if (c == '\n') 204 break; 205 size_t utf16Size; 206 utf16DecodeUtf8Length(c, utf16Size, bytes); 207 if (utf16Size < position.character) 208 position.character -= utf16Size; 209 else 210 position.character = 0; 211 offset += utf16Size; 212 } 213 return offset; 214 } 215 216 /// Converts a line/column position to a byte offset. 217 size_t positionToBytes(Position position) const 218 { 219 size_t index = 0; 220 while (index < text.length && position.line > 0) 221 if (text.ptr[index++] == '\n') 222 position.line--; 223 224 while (index < text.length && position.character > 0) 225 { 226 const c = text.ptr[index]; 227 if (c == '\n') 228 break; 229 size_t utf16Size; 230 utf16DecodeUtf8Length(c, utf16Size, index); 231 if (utf16Size < position.character) 232 position.character -= utf16Size; 233 else 234 position.character = 0; 235 } 236 return index; 237 } 238 239 /// Converts an LSP offset to a line/column position. 240 Position offsetToPosition(size_t offset) const 241 { 242 size_t bytes; 243 size_t index; 244 size_t lastNl = -1; 245 246 Position ret; 247 while (bytes < text.length && index < offset) 248 { 249 const c = text.ptr[bytes]; 250 if (c == '\n') 251 { 252 ret.line++; 253 lastNl = index; 254 } 255 utf16DecodeUtf8Length(c, index, bytes); 256 } 257 const start = lastNl + 1; 258 ret.character = cast(uint)(index - start); 259 return ret; 260 } 261 262 /// Converts a byte offset to a line/column position. 263 Position bytesToPosition(size_t bytes) const 264 { 265 if (bytes > text.length) 266 bytes = text.length; 267 auto part = text.ptr[0 .. bytes].representation; 268 size_t lastNl = -1; 269 Position ret; 270 foreach (i; 0 .. bytes) 271 { 272 if (part.ptr[i] == '\n') 273 { 274 ret.line++; 275 lastNl = i; 276 } 277 } 278 ret.character = cast(uint)(cast(const(char)[]) part[lastNl + 1 .. $]).countUTF16Length; 279 return ret; 280 } 281 282 /// Converts a line/column byte offset to a line/column position. 283 Position lineColumnBytesToPosition(uint line, uint column) const 284 { 285 scope lineText = lineAtScope(line); 286 uint offset = 0; 287 // keep over-extending positions 288 if (column > lineText.length) 289 { 290 offset = column - cast(uint)lineText.length; 291 column -= offset; 292 assert(column <= lineText.length); 293 } 294 return Position(line, cast(uint) lineText[0 .. column].countUTF16Length + offset); 295 } 296 297 /// Returns the position at "end" starting from the given "src" position which is assumed to be at byte "start" 298 /// Faster to quickly calculate nearby positions of known byte positions. 299 /// Falls back to $(LREF bytesToPosition) if end is before start. 300 Position movePositionBytes(Position src, size_t start, size_t end) const 301 { 302 if (end == start) 303 return src; 304 if (end < start) 305 return bytesToPosition(end); 306 307 auto t = text[min($, start) .. min($, end)]; 308 size_t bytes; 309 while (bytes < t.length) 310 { 311 const c = t.ptr[bytes]; 312 if (c == '\n') 313 { 314 src.line++; 315 src.character = 0; 316 bytes++; 317 } 318 else 319 utf16DecodeUtf8Length(c, src.character, bytes); 320 } 321 return src; 322 } 323 324 Position nextPositionBytes(ref Position src, ref size_t start, size_t end) const 325 { 326 auto pos = movePositionBytes(src, start, end); 327 src = pos; 328 start = end; 329 return pos; 330 } 331 332 /// Returns the word range at a given line/column position. 333 TextRange wordRangeAt(Position position) const 334 { 335 auto chars = wordInLine(lineAtScope(position), position.character); 336 return TextRange(Position(position.line, chars[0]), Position(position.line, chars[1])); 337 } 338 339 /// Returns the word range at a given byte position. 340 size_t[2] wordRangeAt(size_t bytes) const 341 { 342 auto lineStart = text.lastIndexOf('\n', bytes) + 1; 343 auto ret = wordInLineBytes(text[lineStart .. $], cast(uint)(bytes - lineStart)); 344 ret[0] += lineStart; 345 ret[1] += lineStart; 346 return ret; 347 } 348 349 /// Returns a byte offset range as `[start, end]` of the given 0-based line 350 /// number. 351 size_t[2] lineByteRangeAt(uint line) const 352 { 353 size_t start = 0; 354 size_t index = 0; 355 while (line > 0 && index < text.length) 356 { 357 const c = text.ptr[index++]; 358 if (c == '\n') 359 { 360 line--; 361 start = index; 362 } 363 } 364 // if !found 365 if (line != 0) 366 return [0, 0]; 367 368 auto end = text.indexOf('\n', start); 369 if (end == -1) 370 end = text.length; 371 else 372 end++; 373 374 return [start, end]; 375 } 376 377 /// Returns the text of a line at the given position. 378 string lineAt(Position position) const 379 { 380 return lineAt(position.line); 381 } 382 383 /// Returns the text of a line starting at line 0. 384 string lineAt(uint line) const 385 { 386 return lineAtScope(line).idup; 387 } 388 389 /// Returns the line text which is only in this scope if text isn't modified 390 /// See_Also: $(LREF lineAt) 391 scope const(char)[] lineAtScope(Position position) const 392 { 393 return lineAtScope(position.line); 394 } 395 396 /// Returns the line text which is only in this scope if text isn't modified 397 /// See_Also: $(LREF lineAt) 398 scope const(char)[] lineAtScope(uint line) const 399 { 400 auto range = lineByteRangeAt(line); 401 return text[range[0] .. range[1]]; 402 } 403 404 unittest 405 { 406 void assertEqual(A, B)(A a, B b) 407 { 408 import std.conv : to; 409 410 assert(a == b, a.to!string ~ " is not equal to " ~ b.to!string); 411 } 412 413 Document doc; 414 doc.setContent(`abc 415 hellö world 416 how åre 417 you?`); 418 assertEqual(doc.lineAt(Position(0, 0)), "abc\n"); 419 assertEqual(doc.lineAt(Position(0, 100)), "abc\n"); 420 assertEqual(doc.lineAt(Position(1, 3)), "hellö world\n"); 421 assertEqual(doc.lineAt(Position(2, 0)), "how åre\n"); 422 assertEqual(doc.lineAt(Position(3, 0)), "you?"); 423 assertEqual(doc.lineAt(Position(3, 8)), "you?"); 424 assertEqual(doc.lineAt(Position(4, 0)), ""); 425 } 426 427 /// Returns how a line is terminated at the given 0-based line number. 428 EolType eolAt(int line) const 429 { 430 size_t index = 0; 431 int curLine = 0; 432 bool prevWasCr = false; 433 while (index < text.length) 434 { 435 if (curLine > line) 436 return EolType.lf; 437 auto c = decode!(UseReplacementDchar.yes)(text, index); 438 if (c == '\n') 439 { 440 if (curLine == line) 441 { 442 return prevWasCr ? EolType.crlf : EolType.lf; 443 } 444 curLine++; 445 } 446 prevWasCr = c == '\r'; 447 } 448 return EolType.lf; 449 } 450 } 451 452 /// Helper struct which should have one unique instance in the application which 453 /// processes document events sent by a LSP client to an LSP server and creates 454 /// an in-memory representation of all the files managed by the client. 455 struct TextDocumentManager 456 { 457 /// Internal document storage. Only iterate over this using `foreach`, other 458 /// operations are not considered officially supported. 459 Document[] documentStore; 460 461 /// Same as $(LREF tryGet) but throws an exception if the URI doesn't exist. 462 ref Document opIndex(string uri) 463 { 464 auto idx = documentStore.countUntil!(a => a.uri == uri); 465 if (idx == -1) 466 throw new Exception("Document '" ~ uri ~ "' not found"); 467 return documentStore[idx]; 468 } 469 470 /// Tries to get a document from a URI, returns Document.init if it is not 471 /// in the in-memory cache / not sent by the client. 472 Document tryGet(string uri) 473 { 474 auto idx = documentStore.countUntil!(a => a.uri == uri); 475 if (idx == -1) 476 return Document.init; 477 return documentStore[idx]; 478 } 479 480 /// Tries to load a given URI manually without having it received via LSP 481 /// methods. Note that a LSP close method will unload this early. 482 /// Returns: the created document 483 /// Throws: FileException in case the file doesn't exist or other file 484 /// system errors. In this case no new document should have been 485 /// inserted yet. 486 ref Document loadFromFilesystem(string uri) 487 { 488 import served.lsp.uri : uriToFile; 489 import fs = std.file; 490 491 string path = uriToFile(uri); 492 auto content = fs.readText(path); 493 494 auto index = documentStore.length++; 495 documentStore[index].uri = uri; 496 documentStore[index].version_ = -1; 497 documentStore[index].setContent(content); 498 return documentStore[index]; 499 } 500 501 /// Unloads the given URI so it's no longer accessible. Note that this 502 /// should only be done for documents loaded manually and never for LSP 503 /// documents as it will break all features in that file until reopened. 504 bool unloadDocument(string uri) 505 { 506 auto idx = documentStore.countUntil!(a => a.uri == uri); 507 if (idx == -1) 508 return false; 509 510 documentStore[idx] = documentStore[$ - 1]; 511 documentStore.length--; 512 return true; 513 } 514 515 /// Returns the currently preferred syncKind to use with the client. 516 /// Additionally always supports the `full` sync kind. 517 static TextDocumentSyncKind syncKind() 518 { 519 return TextDocumentSyncKind.incremental; 520 } 521 522 /// Processes an LSP packet and performs the document update in-memory that 523 /// is requested. 524 /// Params: 525 /// msg = The request sent by a client. This method only processes 526 /// `textDocument/` messages which are relevant to file modification. 527 /// Returns: `true` if the given method was handled, `false` otherwise. 528 bool process(RequestMessage msg) 529 { 530 if (msg.method == "textDocument/didOpen") 531 { 532 auto params = msg.params.fromJSON!DidOpenTextDocumentParams; 533 documentStore ~= Document(params.textDocument); 534 return true; 535 } 536 else if (msg.method == "textDocument/didClose") 537 { 538 auto targetUri = msg.params["textDocument"]["uri"].str; 539 if (!unloadDocument(targetUri)) 540 { 541 warning("Received didClose notification for URI not in system: ", targetUri); 542 warning("This can be a potential memory leak if it was previously opened under a different name."); 543 } 544 return true; 545 } 546 else if (msg.method == "textDocument/didChange") 547 { 548 auto targetUri = msg.params["textDocument"]["uri"].str; 549 auto idx = documentStore.countUntil!(a => a.uri == targetUri); 550 if (idx >= 0) 551 { 552 documentStore[idx].version_ = msg.params["textDocument"]["version"].integer; 553 foreach (change; msg.params["contentChanges"].array) 554 { 555 if (auto rangePtr = "range" in change) 556 { 557 auto range = *rangePtr; 558 TextRange textRange = cast(Position[2])[ 559 range["start"].fromJSON!Position, range["end"].fromJSON!Position 560 ]; 561 documentStore[idx].applyChange(textRange, change["text"].str); 562 } 563 else 564 documentStore[idx].setContent(change["text"].str); 565 } 566 } 567 return true; 568 } 569 return false; 570 } 571 } 572 573 /// Helper structure for storing any data of type T on a per-file basis. 574 struct PerDocumentCache(T) 575 { 576 struct Entry 577 { 578 Document document; 579 T data; 580 } 581 582 Entry[] entries; 583 584 T cached(ref TextDocumentManager source, string uri) 585 { 586 auto newest = source.tryGet(uri); 587 foreach (entry; entries) 588 if (entry.document.uri == uri) 589 { 590 if (entry.document.version_ >= newest.version_) 591 return entry.data; 592 else 593 return T.init; 594 } 595 return T.init; 596 } 597 598 void store(Document document, T data) 599 { 600 foreach (ref entry; entries) 601 { 602 if (entry.document.uri == document.uri) 603 { 604 if (document.version_ >= entry.document.version_) 605 { 606 entry.document = document; 607 entry.data = data; 608 } 609 return; 610 } 611 } 612 entries ~= Entry(document, data); 613 } 614 } 615 616 /// Returns a range of the identifier/word at the given position. 617 uint[2] wordInLine(const(char)[] line, uint character) 618 { 619 return wordInLineImpl!(wchar, uint)(line, character); 620 } 621 622 /// ditto 623 size_t[2] wordInLineBytes(const(char)[] line, size_t bytes) 624 { 625 return wordInLineImpl!(char, size_t)(line, bytes); 626 } 627 628 SizeT[2] wordInLineImpl(CharT, SizeT)(const(char)[] line, SizeT character) 629 { 630 size_t index = 0; 631 SizeT offs = 0; 632 633 SizeT lastStart = character; 634 SizeT start = character, end = character + 1; 635 bool searchStart = true; 636 637 while (index < line.length) 638 { 639 const c = decode(line, index); 640 const l = cast(SizeT) c.codeLength!CharT; 641 642 if (searchStart) 643 { 644 if (isDIdentifierSeparatingChar(c)) 645 lastStart = offs + l; 646 647 if (offs + l >= character) 648 { 649 start = lastStart; 650 searchStart = false; 651 } 652 653 offs += l; 654 } 655 else 656 { 657 end = offs; 658 offs += l; 659 if (isDIdentifierSeparatingChar(c)) 660 break; 661 } 662 } 663 664 if (start > line.length) 665 start = cast(SizeT)line.length; 666 if (end > line.length) 667 end = cast(SizeT)line.length; 668 if (end < start) 669 end = start; 670 671 return [start, end]; 672 } 673 674 deprecated("use isDIdentifierSeparatingChar instead") 675 alias isIdentifierSeparatingChar = isDIdentifierSeparatingChar; 676 677 /// 678 bool isDIdentifierSeparatingChar(dchar c) 679 { 680 return c < 48 || (c > 57 && c < 65) || c == '[' || c == '\\' || c == ']' 681 || c == '`' || (c > 122 && c < 128) || c == '\u2028' || c == '\u2029'; // line separators 682 } 683 684 /// 685 bool isValidDIdentifier(const(char)[] s) 686 { 687 import std.ascii : isDigit; 688 689 return s.length && !s[0].isDigit && !s.any!isDIdentifierSeparatingChar; 690 } 691 692 unittest 693 { 694 assert(!isValidDIdentifier("")); 695 assert(!isValidDIdentifier("0")); 696 assert(!isValidDIdentifier("10")); 697 assert(!isValidDIdentifier("1a")); 698 assert(isValidDIdentifier("_")); 699 assert(isValidDIdentifier("a")); 700 assert(isValidDIdentifier("__helloWorld123")); 701 } 702 703 unittest 704 { 705 Document doc; 706 doc.text.reserve(16); 707 auto ptr = doc.text.ptr; 708 assert(doc.rawText.length == 0); 709 doc.setContent("Hello world"); 710 assert(doc.rawText == "Hello world"); 711 doc.setContent("foo"); 712 assert(doc.rawText == "foo"); 713 doc.setContent("foo bar baz baf"); 714 assert(doc.rawText == "foo bar baz baf"); 715 doc.applyChange(TextRange(0, 4, 0, 8), ""); 716 assert(doc.rawText == "foo baz baf"); 717 doc.applyChange(TextRange(0, 4, 0, 8), "bad"); 718 assert(doc.rawText == "foo badbaf"); 719 doc.applyChange(TextRange(0, 4, 0, 8), "bath"); 720 assert(doc.rawText == "foo bathaf"); 721 doc.applyChange(TextRange(0, 4, 0, 10), "bath"); 722 assert(doc.rawText == "foo bath"); 723 doc.applyChange(TextRange(0, 0, 0, 8), "bath"); 724 assert(doc.rawText == "bath"); 725 doc.applyChange(TextRange(0, 0, 0, 1), "par"); 726 assert(doc.rawText == "parath", doc.rawText); 727 doc.applyChange(TextRange(0, 0, 0, 4), ""); 728 assert(doc.rawText == "th"); 729 doc.applyChange(TextRange(0, 2, 0, 2), "e"); 730 assert(doc.rawText == "the"); 731 doc.applyChange(TextRange(0, 0, 0, 0), "in"); 732 assert(doc.rawText == "inthe"); 733 assert(ptr is doc.text.ptr); 734 } 735 736 pragma(inline, true) private void utf16DecodeUtf8Length(A, B)(char c, ref A utf16Index, 737 ref B utf8Index) @safe nothrow @nogc 738 { 739 switch (c & 0b1111_0000) 740 { 741 case 0b1110_0000: 742 // assume valid encoding (no wrong surrogates) 743 utf16Index++; 744 utf8Index += 3; 745 break; 746 case 0b1111_0000: 747 utf16Index += 2; 748 utf8Index += 4; 749 break; 750 case 0b1100_0000: 751 case 0b1101_0000: 752 utf16Index++; 753 utf8Index += 2; 754 break; 755 default: 756 utf16Index++; 757 utf8Index++; 758 break; 759 } 760 } 761 762 pragma(inline, true) size_t countUTF16Length(scope const(char)[] text) @safe nothrow @nogc 763 { 764 size_t offset; 765 size_t index; 766 while (index < text.length) 767 { 768 const c = (() @trusted => text.ptr[index++])(); 769 if (cast(byte)c >= -0x40) offset++; 770 if (c >= 0xf0) offset++; 771 } 772 return offset; 773 } 774 775 pragma(inline, true) size_t countBytesUntilUTF16Index(scope const(char)[] text, size_t utf16Offset) @safe nothrow @nogc 776 { 777 size_t bytes; 778 size_t offset; 779 while (offset < utf16Offset && bytes < text.length) 780 { 781 char c = (() @trusted => text.ptr[bytes++])(); 782 if (cast(byte)c >= -0x40) offset++; 783 if (c >= 0xf0) offset++; 784 } 785 while (bytes < text.length) 786 { 787 char c = (() @trusted => text.ptr[bytes])(); 788 if (cast(byte)c >= -0x40) break; 789 bytes++; 790 } 791 return bytes; 792 } 793 794 version (unittest) 795 { 796 import core.time; 797 798 Document testUnicodeDocument = Document.nullDocumentOwnMemory(cast(char[]) `/// 799 /// Copyright © 2020 Somebody (not actually™) x3 800 /// 801 module some.file; 802 803 enum Food : int 804 { 805 pizza = '\U0001F355', // 🍕 806 burger = '\U0001F354', // 🍔 807 chicken = '\U0001F357', // 🍗 808 taco = '\U0001F32E', // 🌮 809 wrap = '\U0001F32F', // 🌯 810 salad = '\U0001F957', // 🥗 811 pasta = '\U0001F35D', // 🍝 812 sushi = '\U0001F363', // 🍣 813 oden = '\U0001F362', // 🍢 814 egg = '\U0001F373', // 🍳 815 croissant = '\U0001F950', // 🥐 816 baguette = '\U0001F956', // 🥖 817 popcorn = '\U0001F37F', // 🍿 818 coffee = '\u2615', // ☕ 819 cookie = '\U0001F36A', // 🍪 820 } 821 822 void main() { 823 // taken from https://github.com/DlangRen/Programming-in-D/blob/master/ddili/src/ders/d.cn/aa.d 824 int[string] colorCodes = [ /* ... */ ]; 825 826 if ("purple" in colorCodes) { 827 // ü®™🍳键 “purple” 在表中 828 829 } else { // line 31 830 //表中不存在 键 “purple” 831 } 832 833 string x; 834 }`); 835 836 enum testSOF_byte = 0; 837 enum testSOF_offset = 0; 838 enum testSOF_position = Position(0, 0); 839 840 enum testEOF_byte = 872; 841 enum testEOF_offset = 805; 842 enum testEOF_position = Position(36, 1); 843 844 // in line before unicode 845 enum testLinePreUni_byte = 757; 846 enum testLinePreUni_offset = 724; 847 enum testLinePreUni_position = Position(29, 4); // after `//` 848 849 // in line after unicode 850 enum testLinePostUni_byte = 789; 851 enum testLinePostUni_offset = 742; 852 enum testLinePostUni_position = Position(29, 22); // after `purple” 在` 853 854 // ascii line after unicode line 855 enum testMidAsciiLine_byte = 804; 856 enum testMidAsciiLine_offset = 753; 857 enum testMidAsciiLine_position = Position(31, 7); 858 859 @("{offset, bytes, position} -> {offset, bytes, position}") 860 unittest 861 { 862 import std.conv; 863 import std.stdio; 864 865 static foreach (test; [ 866 "SOF", "EOF", "LinePreUni", "LinePostUni", "MidAsciiLine" 867 ]) 868 { 869 { 870 enum testOffset = mixin("test" ~ test ~ "_offset"); 871 enum testByte = mixin("test" ~ test ~ "_byte"); 872 enum testPosition = mixin("test" ~ test ~ "_position"); 873 874 writeln(" === Test ", test, " ==="); 875 876 writeln(testByte, " byte -> offset ", testOffset); 877 assert(testUnicodeDocument.bytesToOffset(testByte) == testOffset, 878 "fail " ~ test ~ " byte->offset = " ~ testUnicodeDocument.bytesToOffset(testByte) 879 .to!string); 880 writeln(testByte, " byte -> position ", testPosition); 881 assert(testUnicodeDocument.bytesToPosition(testByte) == testPosition, 882 "fail " ~ test ~ " byte->position = " ~ testUnicodeDocument.bytesToPosition(testByte) 883 .to!string); 884 885 writeln(testOffset, " offset -> byte ", testByte); 886 assert(testUnicodeDocument.offsetToBytes(testOffset) == testByte, 887 "fail " ~ test ~ " offset->byte = " ~ testUnicodeDocument.offsetToBytes(testOffset) 888 .to!string); 889 writeln(testOffset, " offset -> position ", testPosition); 890 assert(testUnicodeDocument.offsetToPosition(testOffset) == testPosition, 891 "fail " ~ test ~ " offset->position = " ~ testUnicodeDocument.offsetToPosition(testOffset) 892 .to!string); 893 894 writeln(testPosition, " position -> offset ", testOffset); 895 assert(testUnicodeDocument.positionToOffset(testPosition) == testOffset, 896 "fail " ~ test ~ " position->offset = " ~ testUnicodeDocument.positionToOffset(testPosition) 897 .to!string); 898 writeln(testPosition, " position -> byte ", testByte); 899 assert(testUnicodeDocument.positionToBytes(testPosition) == testByte, 900 "fail " ~ test ~ " position->byte = " ~ testUnicodeDocument.positionToBytes(testPosition) 901 .to!string); 902 903 writeln(); 904 } 905 } 906 907 const size_t maxBytes = testEOF_byte; 908 const size_t maxOffset = testEOF_offset; 909 const Position maxPosition = testEOF_position; 910 911 writeln("max offset -> byte"); 912 assert(testUnicodeDocument.offsetToBytes(size_t.max) == maxBytes); 913 writeln("max offset -> position"); 914 assert(testUnicodeDocument.offsetToPosition(size_t.max) == maxPosition); 915 writeln("max byte -> offset"); 916 assert(testUnicodeDocument.bytesToOffset(size_t.max) == maxOffset); 917 writeln("max byte -> position"); 918 assert(testUnicodeDocument.bytesToPosition(size_t.max) == maxPosition); 919 writeln("max position -> offset"); 920 assert(testUnicodeDocument.positionToOffset(Position(uint.max, uint.max)) == maxOffset); 921 writeln("max position -> byte"); 922 assert(testUnicodeDocument.positionToBytes(Position(uint.max, uint.max)) == maxBytes); 923 } 924 925 version (none) 926 @("character transform benchmarks") 927 unittest 928 { 929 import std.datetime.stopwatch; 930 import std.random; 931 import std.stdio; 932 933 enum PositionCount = 32; 934 size_t[PositionCount] testBytes; 935 size_t[PositionCount] testOffsets; 936 Position[PositionCount] testPositions; 937 938 static immutable funs = [ 939 "offsetToBytes", "offsetToPosition", "bytesToOffset", "bytesToPosition", 940 "positionToOffset", "positionToBytes" 941 ]; 942 943 size_t debugSum; 944 945 size_t lengthUtf16 = testUnicodeDocument.text.codeLength!wchar; 946 enum TestRepeats = 10; 947 Duration[TestRepeats][funs.length] times; 948 949 StopWatch sw; 950 static foreach (iterations; [ 951 1e3, 1e4, /* 1e5 */ 952 ]) 953 { 954 writeln("=================="); 955 writeln("Timing ", iterations, "x", PositionCount, "x", TestRepeats, " iterations:"); 956 foreach (ref row; times) 957 foreach (ref col; row) 958 col = Duration.zero; 959 960 static foreach (t; 0 .. TestRepeats) 961 { 962 foreach (i, ref v; testOffsets) 963 { 964 v = uniform(0, lengthUtf16); 965 testBytes[i] = testUnicodeDocument.offsetToBytes(v); 966 testPositions[i] = testUnicodeDocument.offsetToPosition(v); 967 } 968 static foreach (fi, fun; funs) 969 { 970 sw.reset(); 971 sw.start(); 972 foreach (i; 0 .. iterations) 973 { 974 foreach (v; 0 .. PositionCount) 975 { 976 static if (fun[0] == 'b') 977 mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testBytes[v]).sumVal;"); 978 else static if (fun[0] == 'o') 979 mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testOffsets[v]).sumVal;"); 980 else static if (fun[0] == 'p') 981 mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testPositions[v]).sumVal;"); 982 else 983 static assert(false); 984 } 985 } 986 sw.stop(); 987 times[fi][t] = sw.peek; 988 } 989 } 990 static foreach (fi, fun; funs) 991 { 992 writeln(fun, ": ", formatDurationDistribution(times[fi])); 993 } 994 writeln(); 995 writeln(); 996 } 997 998 writeln("tricking the optimizer", debugSum); 999 } 1000 1001 private pragma(inline, true) size_t sumVal(size_t v) pure @safe nothrow @nogc 1002 { 1003 return v; 1004 } 1005 1006 private pragma(inline, true) size_t sumVal(Position v) pure @trusted nothrow @nogc 1007 { 1008 return cast(size_t)*(cast(ulong*)&v); 1009 } 1010 1011 private string formatDurationDistribution(size_t n)(Duration[n] durs) 1012 { 1013 import std.algorithm : fold, map, sort, sum; 1014 import std.format : format; 1015 import std.math : sqrt; 1016 1017 Duration total = durs[].fold!"a+b"; 1018 sort!"a<b"(durs[]); 1019 double msAvg = cast(double) total.total!"hnsecs" / 10_000.0 / n; 1020 double msMedian = cast(double) durs[$ / 2].total!"hnsecs" / 10_000.0; 1021 double[n] diffs = 0; 1022 foreach (i, dur; durs) 1023 diffs[i] = (cast(double) dur.total!"hnsecs" / 10_000.0) - msAvg; 1024 double msStdDeviation = diffs[].map!"a*a".sum.sqrt; 1025 return format!"[avg=%.4fms, median=%.4f, sd=%.4f]"(msAvg, msMedian, msStdDeviation); 1026 } 1027 }