1 module served.lsp.textdocumentmanager; 2 3 import std.algorithm; 4 import std.experimental.logger; 5 import std.json; 6 import std.string; 7 import std.utf : codeLength, decode, UseReplacementDchar; 8 9 import served.lsp.jsonrpc; 10 import served.lsp.protocol; 11 12 import painlessjson; 13 14 /// in-memory representation of a file at any given URI. Not thread-safe. 15 struct Document 16 { 17 /// The URI of this document. Should not be changed. 18 DocumentUri uri; 19 /// The language ID as reported by the client. Should not be changed. 20 string languageId; 21 /// The document version as reported by the client. Should not be changed. 22 long version_; 23 private char[] text; 24 25 string getLanguageId() const @property @trusted @nogc nothrow 26 { 27 if (!languageId.length) 28 { 29 import std.path : extension; 30 import std.uni : sicmp; 31 32 const ext = uri.extension; 33 if (ext.sicmp(".d") == 0) 34 return "d"; 35 else if (ext.sicmp(".dpp") == 0) 36 return "dpp"; 37 else if (ext.sicmp(".ds") == 0 || ext.sicmp(".dscript") == 0) 38 return "dscript"; 39 else if (ext.sicmp(".dml") == 0) 40 return "dml"; 41 else if (ext.sicmp(".sdl") == 0) 42 return "sdl"; 43 else if (ext.sicmp(".dt") == 0) 44 return "diet"; 45 else 46 return null; 47 } 48 49 return languageId; 50 } 51 52 /// Creates a new D document at the given document URI, with version 0 and 53 /// no text. 54 this(DocumentUri uri) 55 { 56 this.uri = uri; 57 languageId = "d"; 58 version_ = 0; 59 text = null; 60 } 61 62 /// Creates a new document at the given document URI, with the given version 63 /// and language and creates a copy of the text to use. 64 this(TextDocumentItem doc) 65 { 66 uri = doc.uri; 67 languageId = doc.languageId; 68 version_ = doc.version_; 69 text = doc.text.dup; 70 } 71 72 /// Creates a document with no URI and no language ID and copies the content 73 /// into the text buffer using $(LREF setContent). 74 static Document nullDocument(scope const(char)[] content) 75 { 76 Document ret; 77 ret.setContent(content); 78 return ret; 79 } 80 81 immutable(Document) clone() 82 { 83 Document ret = this; 84 ret.text = text.dup; 85 return cast(immutable) ret; 86 } 87 88 version (unittest) private static Document nullDocumentOwnMemory(char[] content) 89 { 90 Document ret; 91 ret.text = content; 92 return ret; 93 } 94 95 /// Returns a read-only view of the text. The text may however be changed 96 /// by other operations, so this slice should be used directly and not after 97 /// any context yield or API call potentially modifying the data. 98 const(char)[] rawText() const 99 { 100 return cast(const(char)[]) text; 101 } 102 103 string rawText() immutable 104 { 105 return text; 106 } 107 108 /// 109 size_t length() const @property 110 { 111 return text.length; 112 } 113 114 /// Sets the content of this document to the given content. Copies the data 115 /// from newContent into this text buffer. 116 /// 117 /// Should not be called as an API unless managing some kind of virtual 118 /// document manually. 119 void setContent(scope const(char)[] newContent) 120 { 121 if (newContent.length <= text.length) 122 { 123 text[0 .. newContent.length] = newContent; 124 text.length = newContent.length; 125 } 126 else 127 { 128 text = text.assumeSafeAppend; 129 text.length = newContent.length; 130 text = text.assumeSafeAppend; 131 text[0 .. $] = newContent; 132 } 133 } 134 135 /// 136 void applyChange(TextRange range, scope const(char)[] newContent) 137 { 138 auto start = positionToBytes(range[0]); 139 auto end = positionToBytes(range[1]); 140 141 if (start > end) 142 swap(start, end); 143 144 if (start == 0 && end == text.length) 145 { 146 setContent(newContent); 147 return; 148 } 149 150 auto addition = newContent.representation; 151 int removed = cast(int) end - cast(int) start; 152 int added = cast(int) addition.length - removed; 153 text = text.assumeSafeAppend; 154 if (added > 0) 155 { 156 text.length += added; 157 // text[end + added .. $] = text[end .. $ - added]; 158 for (int i = cast(int) text.length - 1; i >= end + added; i--) 159 text[i] = text[i - added]; 160 } 161 else if (added < 0) 162 { 163 for (size_t i = start; i < text.length + added; i++) 164 text[i] = text[i - added]; 165 166 text = text[0 .. $ + added]; 167 } 168 text = text.assumeSafeAppend; 169 170 foreach (i, c; addition) 171 text[start + i] = cast(char) c; 172 } 173 174 /// Converts an LSP offset to a byte offset for using for example in array 175 /// slicing. 176 size_t offsetToBytes(size_t offset) const 177 { 178 return .countBytesUntilUTF16Index(text, offset); 179 } 180 181 /// Converts a byte offset to an LSP offset. 182 size_t bytesToOffset(size_t bytes) const 183 { 184 return .countUTF16Length(text[0 .. min($, bytes)]); 185 } 186 187 /// Converts a line/column position to an LSP offset. 188 size_t positionToOffset(Position position) const 189 { 190 size_t offset = 0; 191 size_t bytes = 0; 192 while (bytes < text.length && position.line > 0) 193 { 194 const c = text.ptr[bytes]; 195 if (c == '\n') 196 position.line--; 197 utf16DecodeUtf8Length(c, offset, bytes); 198 } 199 200 while (bytes < text.length && position.character > 0) 201 { 202 const c = text.ptr[bytes]; 203 if (c == '\n') 204 break; 205 size_t utf16Size; 206 utf16DecodeUtf8Length(c, utf16Size, bytes); 207 if (utf16Size < position.character) 208 position.character -= utf16Size; 209 else 210 position.character = 0; 211 offset += utf16Size; 212 } 213 return offset; 214 } 215 216 /// Converts a line/column position to a byte offset. 217 size_t positionToBytes(Position position) const 218 { 219 size_t index = 0; 220 while (index < text.length && position.line > 0) 221 if (text.ptr[index++] == '\n') 222 position.line--; 223 224 while (index < text.length && position.character > 0) 225 { 226 const c = text.ptr[index]; 227 if (c == '\n') 228 break; 229 size_t utf16Size; 230 utf16DecodeUtf8Length(c, utf16Size, index); 231 if (utf16Size < position.character) 232 position.character -= utf16Size; 233 else 234 position.character = 0; 235 } 236 return index; 237 } 238 239 /// Converts an LSP offset to a line/column position. 240 Position offsetToPosition(size_t offset) const 241 { 242 size_t bytes; 243 size_t index; 244 size_t lastNl = -1; 245 246 Position ret; 247 while (bytes < text.length && index < offset) 248 { 249 const c = text.ptr[bytes]; 250 if (c == '\n') 251 { 252 ret.line++; 253 lastNl = index; 254 } 255 utf16DecodeUtf8Length(c, index, bytes); 256 } 257 const start = lastNl + 1; 258 ret.character = cast(uint)(index - start); 259 return ret; 260 } 261 262 /// Converts a byte offset to a line/column position. 263 Position bytesToPosition(size_t bytes) const 264 { 265 if (bytes > text.length) 266 bytes = text.length; 267 auto part = text.ptr[0 .. bytes].representation; 268 size_t lastNl = -1; 269 Position ret; 270 foreach (i; 0 .. bytes) 271 { 272 if (part.ptr[i] == '\n') 273 { 274 ret.line++; 275 lastNl = i; 276 } 277 } 278 ret.character = cast(uint)(cast(const(char)[]) part[lastNl + 1 .. $]).countUTF16Length; 279 return ret; 280 } 281 282 /// Converts a line/column byte offset to a line/column position. 283 Position lineColumnBytesToPosition(uint line, uint column) const 284 { 285 scope lineText = lineAtScope(line); 286 uint offset = 0; 287 // keep over-extending positions 288 if (column > lineText.length) 289 { 290 offset = column - cast(uint)lineText.length; 291 column -= offset; 292 assert(column <= lineText.length); 293 } 294 return Position(line, cast(uint) lineText[0 .. column].countUTF16Length + offset); 295 } 296 297 /// Returns the position at "end" starting from the given "src" position which is assumed to be at byte "start" 298 /// Faster to quickly calculate nearby positions of known byte positions. 299 /// Falls back to $(LREF bytesToPosition) if end is before start. 300 Position movePositionBytes(Position src, size_t start, size_t end) const 301 { 302 if (end == start) 303 return src; 304 if (end < start) 305 return bytesToPosition(end); 306 307 auto t = text[min($, start) .. min($, end)]; 308 size_t bytes; 309 while (bytes < t.length) 310 { 311 const c = t.ptr[bytes]; 312 if (c == '\n') 313 { 314 src.line++; 315 src.character = 0; 316 bytes++; 317 } 318 else 319 utf16DecodeUtf8Length(c, src.character, bytes); 320 } 321 return src; 322 } 323 324 Position nextPositionBytes(ref Position src, ref size_t start, size_t end) const 325 { 326 auto pos = movePositionBytes(src, start, end); 327 src = pos; 328 start = end; 329 return pos; 330 } 331 332 /// Returns the word range at a given line/column position. 333 TextRange wordRangeAt(Position position) const 334 { 335 auto chars = wordInLine(lineAtScope(position), position.character); 336 return TextRange(Position(position.line, chars[0]), Position(position.line, chars[1])); 337 } 338 339 /// Returns the word range at a given byte position. 340 size_t[2] wordRangeAt(size_t bytes) const 341 { 342 auto lineStart = text.lastIndexOf('\n', bytes) + 1; 343 auto ret = wordInLineBytes(text[lineStart .. $], cast(uint)(bytes - lineStart)); 344 ret[0] += lineStart; 345 ret[1] += lineStart; 346 return ret; 347 } 348 349 /// Returns a byte offset range as `[start, end]` of the given 0-based line 350 /// number. 351 size_t[2] lineByteRangeAt(uint line) const 352 { 353 size_t start = 0; 354 size_t index = 0; 355 while (line > 0 && index < text.length) 356 { 357 const c = text.ptr[index++]; 358 if (c == '\n') 359 { 360 line--; 361 start = index; 362 } 363 } 364 // if !found 365 if (line != 0) 366 return [0, 0]; 367 368 auto end = text.indexOf('\n', start); 369 if (end == -1) 370 end = text.length; 371 else 372 end++; 373 374 return [start, end]; 375 } 376 377 /// Returns the text of a line at the given position. 378 string lineAt(Position position) const 379 { 380 return lineAt(position.line); 381 } 382 383 /// Returns the text of a line starting at line 0. 384 string lineAt(uint line) const 385 { 386 return lineAtScope(line).idup; 387 } 388 389 /// Returns the line text which is only in this scope if text isn't modified 390 /// See_Also: $(LREF lineAt) 391 scope const(char)[] lineAtScope(Position position) const 392 { 393 return lineAtScope(position.line); 394 } 395 396 /// Returns the line text which is only in this scope if text isn't modified 397 /// See_Also: $(LREF lineAt) 398 scope const(char)[] lineAtScope(uint line) const 399 { 400 auto range = lineByteRangeAt(line); 401 return text[range[0] .. range[1]]; 402 } 403 404 unittest 405 { 406 void assertEqual(A, B)(A a, B b) 407 { 408 import std.conv : to; 409 410 assert(a == b, a.to!string ~ " is not equal to " ~ b.to!string); 411 } 412 413 Document doc; 414 doc.setContent(`abc 415 hellö world 416 how åre 417 you?`); 418 assertEqual(doc.lineAt(Position(0, 0)), "abc\n"); 419 assertEqual(doc.lineAt(Position(0, 100)), "abc\n"); 420 assertEqual(doc.lineAt(Position(1, 3)), "hellö world\n"); 421 assertEqual(doc.lineAt(Position(2, 0)), "how åre\n"); 422 assertEqual(doc.lineAt(Position(3, 0)), "you?"); 423 assertEqual(doc.lineAt(Position(3, 8)), "you?"); 424 assertEqual(doc.lineAt(Position(4, 0)), ""); 425 } 426 427 /// Returns how a line is terminated at the given 0-based line number. 428 EolType eolAt(int line) const 429 { 430 size_t index = 0; 431 int curLine = 0; 432 bool prevWasCr = false; 433 while (index < text.length) 434 { 435 if (curLine > line) 436 return EolType.lf; 437 auto c = decode!(UseReplacementDchar.yes)(text, index); 438 if (c == '\n') 439 { 440 if (curLine == line) 441 { 442 return prevWasCr ? EolType.crlf : EolType.lf; 443 } 444 curLine++; 445 } 446 prevWasCr = c == '\r'; 447 } 448 return EolType.lf; 449 } 450 } 451 452 /// Helper struct which should have one unique instance in the application which 453 /// processes document events sent by a LSP client to an LSP server and creates 454 /// an in-memory representation of all the files managed by the client. 455 struct TextDocumentManager 456 { 457 /// Internal document storage. Only iterate over this using `foreach`, other 458 /// operations are not considered officially supported. 459 Document[] documentStore; 460 461 /// Same as $(LREF tryGet) but throws an exception if the URI doesn't exist. 462 ref Document opIndex(string uri) 463 { 464 auto idx = documentStore.countUntil!(a => a.uri == uri); 465 if (idx == -1) 466 throw new Exception("Document '" ~ uri ~ "' not found"); 467 return documentStore[idx]; 468 } 469 470 /// Tries to get a document from a URI, returns Document.init if it is not 471 /// in the in-memory cache / not sent by the client. 472 Document tryGet(string uri) 473 { 474 auto idx = documentStore.countUntil!(a => a.uri == uri); 475 if (idx == -1) 476 return Document.init; 477 return documentStore[idx]; 478 } 479 480 /// Tries to load a given URI manually without having it received via LSP 481 /// methods. Note that a LSP close method will unload this early. 482 /// Returns: the created document 483 /// Throws: FileException in case the file doesn't exist or other file 484 /// system errors. In this case no new document should have been 485 /// inserted yet. 486 ref Document loadFromFilesystem(string uri) 487 { 488 import served.lsp.uri : uriToFile; 489 import fs = std.file; 490 491 string path = uriToFile(uri); 492 auto content = fs.readText(path); 493 494 auto index = documentStore.length++; 495 documentStore[index].uri = uri; 496 documentStore[index].version_ = -1; 497 documentStore[index].setContent(content); 498 return documentStore[index]; 499 } 500 501 /// Tries to get a document from a URI, returns Document.init if it is not 502 /// in the in-memory cache / not sent by the client. 503 /// Throws: FileException in case the file doesn't exist or other file 504 /// system errors. In this case no new document should have been 505 /// inserted yet. 506 ref Document getOrFromFilesystem(string uri) 507 { 508 auto idx = documentStore.countUntil!(a => a.uri == uri); 509 if (idx == -1) 510 return loadFromFilesystem(uri); 511 else 512 return documentStore[idx]; 513 } 514 515 /// Unloads the given URI so it's no longer accessible. Note that this 516 /// should only be done for documents loaded manually and never for LSP 517 /// documents as it will break all features in that file until reopened. 518 bool unloadDocument(string uri) 519 { 520 auto idx = documentStore.countUntil!(a => a.uri == uri); 521 if (idx == -1) 522 return false; 523 524 documentStore[idx] = documentStore[$ - 1]; 525 documentStore.length--; 526 return true; 527 } 528 529 /// Returns the currently preferred syncKind to use with the client. 530 /// Additionally always supports the `full` sync kind. 531 static TextDocumentSyncKind syncKind() 532 { 533 return TextDocumentSyncKind.incremental; 534 } 535 536 /// Processes an LSP packet and performs the document update in-memory that 537 /// is requested. 538 /// Params: 539 /// msg = The request sent by a client. This method only processes 540 /// `textDocument/` messages which are relevant to file modification. 541 /// Returns: `true` if the given method was handled, `false` otherwise. 542 bool process(RequestMessage msg) 543 { 544 if (msg.method == "textDocument/didOpen") 545 { 546 auto params = msg.params.fromJSON!DidOpenTextDocumentParams; 547 documentStore ~= Document(params.textDocument); 548 return true; 549 } 550 else if (msg.method == "textDocument/didClose") 551 { 552 auto targetUri = msg.params["textDocument"]["uri"].str; 553 if (!unloadDocument(targetUri)) 554 { 555 warning("Received didClose notification for URI not in system: ", targetUri); 556 warning("This can be a potential memory leak if it was previously opened under a different name."); 557 } 558 return true; 559 } 560 else if (msg.method == "textDocument/didChange") 561 { 562 auto targetUri = msg.params["textDocument"]["uri"].str; 563 auto idx = documentStore.countUntil!(a => a.uri == targetUri); 564 if (idx >= 0) 565 { 566 documentStore[idx].version_ = msg.params["textDocument"]["version"].integer; 567 foreach (change; msg.params["contentChanges"].array) 568 { 569 if (auto rangePtr = "range" in change) 570 { 571 auto range = *rangePtr; 572 TextRange textRange = cast(Position[2])[ 573 range["start"].fromJSON!Position, range["end"].fromJSON!Position 574 ]; 575 documentStore[idx].applyChange(textRange, change["text"].str); 576 } 577 else 578 documentStore[idx].setContent(change["text"].str); 579 } 580 } 581 return true; 582 } 583 return false; 584 } 585 } 586 587 /// Helper structure for storing any data of type T on a per-file basis. 588 struct PerDocumentCache(T) 589 { 590 struct Entry 591 { 592 Document document; 593 T data; 594 } 595 596 Entry[] entries; 597 598 T cached(ref TextDocumentManager source, string uri) 599 { 600 auto newest = source.tryGet(uri); 601 foreach (entry; entries) 602 if (entry.document.uri == uri) 603 { 604 if (entry.document.version_ >= newest.version_) 605 return entry.data; 606 else 607 return T.init; 608 } 609 return T.init; 610 } 611 612 void store(Document document, T data) 613 { 614 foreach (ref entry; entries) 615 { 616 if (entry.document.uri == document.uri) 617 { 618 if (document.version_ >= entry.document.version_) 619 { 620 entry.document = document; 621 entry.data = data; 622 } 623 return; 624 } 625 } 626 entries ~= Entry(document, data); 627 } 628 } 629 630 /// Returns a range of the identifier/word at the given position. 631 uint[2] wordInLine(const(char)[] line, uint character) 632 { 633 return wordInLineImpl!(wchar, uint)(line, character); 634 } 635 636 /// ditto 637 size_t[2] wordInLineBytes(const(char)[] line, size_t bytes) 638 { 639 return wordInLineImpl!(char, size_t)(line, bytes); 640 } 641 642 SizeT[2] wordInLineImpl(CharT, SizeT)(const(char)[] line, SizeT character) 643 { 644 size_t index = 0; 645 SizeT offs = 0; 646 647 SizeT lastStart = character; 648 SizeT start = character, end = character + 1; 649 bool searchStart = true; 650 651 while (index < line.length) 652 { 653 const c = decode(line, index); 654 const l = cast(SizeT) c.codeLength!CharT; 655 656 if (searchStart) 657 { 658 if (isDIdentifierSeparatingChar(c)) 659 lastStart = offs + l; 660 661 if (offs + l >= character) 662 { 663 start = lastStart; 664 searchStart = false; 665 } 666 667 offs += l; 668 } 669 else 670 { 671 end = offs; 672 offs += l; 673 if (isDIdentifierSeparatingChar(c)) 674 break; 675 } 676 } 677 678 if (start > line.length) 679 start = cast(SizeT)line.length; 680 if (end > line.length) 681 end = cast(SizeT)line.length; 682 if (end < start) 683 end = start; 684 685 return [start, end]; 686 } 687 688 deprecated("use isDIdentifierSeparatingChar instead") 689 alias isIdentifierSeparatingChar = isDIdentifierSeparatingChar; 690 691 /// 692 bool isDIdentifierSeparatingChar(dchar c) 693 { 694 return c < 48 || (c > 57 && c < 65) || c == '[' || c == '\\' || c == ']' 695 || c == '`' || (c > 122 && c < 128) || c == '\u2028' || c == '\u2029'; // line separators 696 } 697 698 /// 699 bool isValidDIdentifier(const(char)[] s) 700 { 701 import std.ascii : isDigit; 702 703 return s.length && !s[0].isDigit && !s.any!isDIdentifierSeparatingChar; 704 } 705 706 unittest 707 { 708 assert(!isValidDIdentifier("")); 709 assert(!isValidDIdentifier("0")); 710 assert(!isValidDIdentifier("10")); 711 assert(!isValidDIdentifier("1a")); 712 assert(isValidDIdentifier("_")); 713 assert(isValidDIdentifier("a")); 714 assert(isValidDIdentifier("__helloWorld123")); 715 } 716 717 unittest 718 { 719 Document doc; 720 doc.text.reserve(16); 721 auto ptr = doc.text.ptr; 722 assert(doc.rawText.length == 0); 723 doc.setContent("Hello world"); 724 assert(doc.rawText == "Hello world"); 725 doc.setContent("foo"); 726 assert(doc.rawText == "foo"); 727 doc.setContent("foo bar baz baf"); 728 assert(doc.rawText == "foo bar baz baf"); 729 doc.applyChange(TextRange(0, 4, 0, 8), ""); 730 assert(doc.rawText == "foo baz baf"); 731 doc.applyChange(TextRange(0, 4, 0, 8), "bad"); 732 assert(doc.rawText == "foo badbaf"); 733 doc.applyChange(TextRange(0, 4, 0, 8), "bath"); 734 assert(doc.rawText == "foo bathaf"); 735 doc.applyChange(TextRange(0, 4, 0, 10), "bath"); 736 assert(doc.rawText == "foo bath"); 737 doc.applyChange(TextRange(0, 0, 0, 8), "bath"); 738 assert(doc.rawText == "bath"); 739 doc.applyChange(TextRange(0, 0, 0, 1), "par"); 740 assert(doc.rawText == "parath", doc.rawText); 741 doc.applyChange(TextRange(0, 0, 0, 4), ""); 742 assert(doc.rawText == "th"); 743 doc.applyChange(TextRange(0, 2, 0, 2), "e"); 744 assert(doc.rawText == "the"); 745 doc.applyChange(TextRange(0, 0, 0, 0), "in"); 746 assert(doc.rawText == "inthe"); 747 assert(ptr is doc.text.ptr); 748 } 749 750 pragma(inline, true) private void utf16DecodeUtf8Length(A, B)(char c, ref A utf16Index, 751 ref B utf8Index) @safe nothrow @nogc 752 { 753 switch (c & 0b1111_0000) 754 { 755 case 0b1110_0000: 756 // assume valid encoding (no wrong surrogates) 757 utf16Index++; 758 utf8Index += 3; 759 break; 760 case 0b1111_0000: 761 utf16Index += 2; 762 utf8Index += 4; 763 break; 764 case 0b1100_0000: 765 case 0b1101_0000: 766 utf16Index++; 767 utf8Index += 2; 768 break; 769 default: 770 utf16Index++; 771 utf8Index++; 772 break; 773 } 774 } 775 776 pragma(inline, true) size_t countUTF16Length(scope const(char)[] text) @safe nothrow @nogc 777 { 778 size_t offset; 779 size_t index; 780 while (index < text.length) 781 { 782 const c = (() @trusted => text.ptr[index++])(); 783 if (cast(byte)c >= -0x40) offset++; 784 if (c >= 0xf0) offset++; 785 } 786 return offset; 787 } 788 789 pragma(inline, true) size_t countBytesUntilUTF16Index(scope const(char)[] text, size_t utf16Offset) @safe nothrow @nogc 790 { 791 size_t bytes; 792 size_t offset; 793 while (offset < utf16Offset && bytes < text.length) 794 { 795 char c = (() @trusted => text.ptr[bytes++])(); 796 if (cast(byte)c >= -0x40) offset++; 797 if (c >= 0xf0) offset++; 798 } 799 while (bytes < text.length) 800 { 801 char c = (() @trusted => text.ptr[bytes])(); 802 if (cast(byte)c >= -0x40) break; 803 bytes++; 804 } 805 return bytes; 806 } 807 808 version (unittest) 809 { 810 import core.time; 811 812 Document testUnicodeDocument = Document.nullDocumentOwnMemory(cast(char[]) `/// 813 /// Copyright © 2020 Somebody (not actually™) x3 814 /// 815 module some.file; 816 817 enum Food : int 818 { 819 pizza = '\U0001F355', // 🍕 820 burger = '\U0001F354', // 🍔 821 chicken = '\U0001F357', // 🍗 822 taco = '\U0001F32E', // 🌮 823 wrap = '\U0001F32F', // 🌯 824 salad = '\U0001F957', // 🥗 825 pasta = '\U0001F35D', // 🍝 826 sushi = '\U0001F363', // 🍣 827 oden = '\U0001F362', // 🍢 828 egg = '\U0001F373', // 🍳 829 croissant = '\U0001F950', // 🥐 830 baguette = '\U0001F956', // 🥖 831 popcorn = '\U0001F37F', // 🍿 832 coffee = '\u2615', // ☕ 833 cookie = '\U0001F36A', // 🍪 834 } 835 836 void main() { 837 // taken from https://github.com/DlangRen/Programming-in-D/blob/master/ddili/src/ders/d.cn/aa.d 838 int[string] colorCodes = [ /* ... */ ]; 839 840 if ("purple" in colorCodes) { 841 // ü®™🍳键 “purple” 在表中 842 843 } else { // line 31 844 //表中不存在 键 “purple” 845 } 846 847 string x; 848 }`); 849 850 enum testSOF_byte = 0; 851 enum testSOF_offset = 0; 852 enum testSOF_position = Position(0, 0); 853 854 enum testEOF_byte = 872; 855 enum testEOF_offset = 805; 856 enum testEOF_position = Position(36, 1); 857 858 // in line before unicode 859 enum testLinePreUni_byte = 757; 860 enum testLinePreUni_offset = 724; 861 enum testLinePreUni_position = Position(29, 4); // after `//` 862 863 // in line after unicode 864 enum testLinePostUni_byte = 789; 865 enum testLinePostUni_offset = 742; 866 enum testLinePostUni_position = Position(29, 22); // after `purple” 在` 867 868 // ascii line after unicode line 869 enum testMidAsciiLine_byte = 804; 870 enum testMidAsciiLine_offset = 753; 871 enum testMidAsciiLine_position = Position(31, 7); 872 873 @("{offset, bytes, position} -> {offset, bytes, position}") 874 unittest 875 { 876 import std.conv; 877 import std.stdio; 878 879 static foreach (test; [ 880 "SOF", "EOF", "LinePreUni", "LinePostUni", "MidAsciiLine" 881 ]) 882 { 883 { 884 enum testOffset = mixin("test" ~ test ~ "_offset"); 885 enum testByte = mixin("test" ~ test ~ "_byte"); 886 enum testPosition = mixin("test" ~ test ~ "_position"); 887 888 writeln(" === Test ", test, " ==="); 889 890 writeln(testByte, " byte -> offset ", testOffset); 891 assert(testUnicodeDocument.bytesToOffset(testByte) == testOffset, 892 "fail " ~ test ~ " byte->offset = " ~ testUnicodeDocument.bytesToOffset(testByte) 893 .to!string); 894 writeln(testByte, " byte -> position ", testPosition); 895 assert(testUnicodeDocument.bytesToPosition(testByte) == testPosition, 896 "fail " ~ test ~ " byte->position = " ~ testUnicodeDocument.bytesToPosition(testByte) 897 .to!string); 898 899 writeln(testOffset, " offset -> byte ", testByte); 900 assert(testUnicodeDocument.offsetToBytes(testOffset) == testByte, 901 "fail " ~ test ~ " offset->byte = " ~ testUnicodeDocument.offsetToBytes(testOffset) 902 .to!string); 903 writeln(testOffset, " offset -> position ", testPosition); 904 assert(testUnicodeDocument.offsetToPosition(testOffset) == testPosition, 905 "fail " ~ test ~ " offset->position = " ~ testUnicodeDocument.offsetToPosition(testOffset) 906 .to!string); 907 908 writeln(testPosition, " position -> offset ", testOffset); 909 assert(testUnicodeDocument.positionToOffset(testPosition) == testOffset, 910 "fail " ~ test ~ " position->offset = " ~ testUnicodeDocument.positionToOffset(testPosition) 911 .to!string); 912 writeln(testPosition, " position -> byte ", testByte); 913 assert(testUnicodeDocument.positionToBytes(testPosition) == testByte, 914 "fail " ~ test ~ " position->byte = " ~ testUnicodeDocument.positionToBytes(testPosition) 915 .to!string); 916 917 writeln(); 918 } 919 } 920 921 const size_t maxBytes = testEOF_byte; 922 const size_t maxOffset = testEOF_offset; 923 const Position maxPosition = testEOF_position; 924 925 writeln("max offset -> byte"); 926 assert(testUnicodeDocument.offsetToBytes(size_t.max) == maxBytes); 927 writeln("max offset -> position"); 928 assert(testUnicodeDocument.offsetToPosition(size_t.max) == maxPosition); 929 writeln("max byte -> offset"); 930 assert(testUnicodeDocument.bytesToOffset(size_t.max) == maxOffset); 931 writeln("max byte -> position"); 932 assert(testUnicodeDocument.bytesToPosition(size_t.max) == maxPosition); 933 writeln("max position -> offset"); 934 assert(testUnicodeDocument.positionToOffset(Position(uint.max, uint.max)) == maxOffset); 935 writeln("max position -> byte"); 936 assert(testUnicodeDocument.positionToBytes(Position(uint.max, uint.max)) == maxBytes); 937 } 938 939 version (none) 940 @("character transform benchmarks") 941 unittest 942 { 943 import std.datetime.stopwatch; 944 import std.random; 945 import std.stdio; 946 947 enum PositionCount = 32; 948 size_t[PositionCount] testBytes; 949 size_t[PositionCount] testOffsets; 950 Position[PositionCount] testPositions; 951 952 static immutable funs = [ 953 "offsetToBytes", "offsetToPosition", "bytesToOffset", "bytesToPosition", 954 "positionToOffset", "positionToBytes" 955 ]; 956 957 size_t debugSum; 958 959 size_t lengthUtf16 = testUnicodeDocument.text.codeLength!wchar; 960 enum TestRepeats = 10; 961 Duration[TestRepeats][funs.length] times; 962 963 StopWatch sw; 964 static foreach (iterations; [ 965 1e3, 1e4, /* 1e5 */ 966 ]) 967 { 968 writeln("=================="); 969 writeln("Timing ", iterations, "x", PositionCount, "x", TestRepeats, " iterations:"); 970 foreach (ref row; times) 971 foreach (ref col; row) 972 col = Duration.zero; 973 974 static foreach (t; 0 .. TestRepeats) 975 { 976 foreach (i, ref v; testOffsets) 977 { 978 v = uniform(0, lengthUtf16); 979 testBytes[i] = testUnicodeDocument.offsetToBytes(v); 980 testPositions[i] = testUnicodeDocument.offsetToPosition(v); 981 } 982 static foreach (fi, fun; funs) 983 { 984 sw.reset(); 985 sw.start(); 986 foreach (i; 0 .. iterations) 987 { 988 foreach (v; 0 .. PositionCount) 989 { 990 static if (fun[0] == 'b') 991 mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testBytes[v]).sumVal;"); 992 else static if (fun[0] == 'o') 993 mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testOffsets[v]).sumVal;"); 994 else static if (fun[0] == 'p') 995 mixin("debugSum |= testUnicodeDocument." ~ fun ~ "(testPositions[v]).sumVal;"); 996 else 997 static assert(false); 998 } 999 } 1000 sw.stop(); 1001 times[fi][t] = sw.peek; 1002 } 1003 } 1004 static foreach (fi, fun; funs) 1005 { 1006 writeln(fun, ": ", formatDurationDistribution(times[fi])); 1007 } 1008 writeln(); 1009 writeln(); 1010 } 1011 1012 writeln("tricking the optimizer", debugSum); 1013 } 1014 1015 private pragma(inline, true) size_t sumVal(size_t v) pure @safe nothrow @nogc 1016 { 1017 return v; 1018 } 1019 1020 private pragma(inline, true) size_t sumVal(Position v) pure @trusted nothrow @nogc 1021 { 1022 return cast(size_t)*(cast(ulong*)&v); 1023 } 1024 1025 private string formatDurationDistribution(size_t n)(Duration[n] durs) 1026 { 1027 import std.algorithm : fold, map, sort, sum; 1028 import std.format : format; 1029 import std.math : sqrt; 1030 1031 Duration total = durs[].fold!"a+b"; 1032 sort!"a<b"(durs[]); 1033 double msAvg = cast(double) total.total!"hnsecs" / 10_000.0 / n; 1034 double msMedian = cast(double) durs[$ / 2].total!"hnsecs" / 10_000.0; 1035 double[n] diffs = 0; 1036 foreach (i, dur; durs) 1037 diffs[i] = (cast(double) dur.total!"hnsecs" / 10_000.0) - msAvg; 1038 double msStdDeviation = diffs[].map!"a*a".sum.sqrt; 1039 return format!"[avg=%.4fms, median=%.4f, sd=%.4f]"(msAvg, msMedian, msStdDeviation); 1040 } 1041 }