1 /** 2 Mirror _unicodeobject.h 3 4 Unicode API names are mangled to assure that UCS-2 and UCS-4 builds 5 produce different external names and thus cause import errors in 6 case Python interpreters and extensions with mixed compiled in 7 Unicode width assumptions are combined. 8 */ 9 module deimos.python.unicodeobject; 10 11 import core.stdc.stdarg; 12 import core.stdc..string; 13 import deimos.python.pyport; 14 import deimos.python.object; 15 16 extern(C): 17 // Python-header-file: Include/unicodeobject.h: 18 19 /** Py_UNICODE is the native Unicode storage format (code unit) used by 20 Python and represents a single Unicode element in the Unicode 21 type. */ 22 version (Python_Unicode_UCS2) { 23 version (Windows) { 24 alias wchar Py_UNICODE; 25 } else { 26 alias ushort Py_UNICODE; 27 } 28 } else { 29 alias uint Py_UNICODE; 30 } 31 alias Py_UNICODE Py_UCS4; 32 33 /** 34 subclass of PyObject. 35 */ 36 struct PyUnicodeObject { 37 mixin PyObject_HEAD; 38 /** Length of raw Unicode data in buffer */ 39 Py_ssize_t length; 40 /** Raw Unicode buffer */ 41 Py_UNICODE* str; 42 /** Hash value; -1 if not set */ 43 C_long hash; 44 /** (Default) Encoded version as Python 45 string, or NULL; this is used for 46 implementing the buffer protocol */ 47 PyObject* defenc; 48 } 49 50 /// _ 51 mixin(PyAPI_DATA!"PyTypeObject PyUnicode_Type"); 52 53 // D translations of C macros: 54 /** Fast access macros */ 55 int PyUnicode_Check()(PyObject* op) { 56 return PyObject_TypeCheck(op, &PyUnicode_Type); 57 } 58 /// ditto 59 int PyUnicode_CheckExact()(PyObject* op) { 60 return Py_TYPE(op) == &PyUnicode_Type; 61 } 62 63 /// ditto 64 size_t PyUnicode_GET_SIZE()(PyUnicodeObject* op) { 65 return op.length; 66 } 67 /// ditto 68 size_t PyUnicode_GET_DATA_SIZE()(PyUnicodeObject* op) { 69 return op.length * Py_UNICODE.sizeof; 70 } 71 /// ditto 72 Py_UNICODE* PyUnicode_AS_UNICODE()(PyUnicodeObject* op) { 73 return op.str; 74 } 75 /// ditto 76 const(char)* PyUnicode_AS_DATA()(PyUnicodeObject* op) { 77 return cast(const(char)*) op.str; 78 } 79 80 /** This Unicode character will be used as replacement character during 81 decoding if the errors argument is set to "replace". Note: the 82 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in 83 Unicode 3.0. */ 84 enum Py_UNICODE Py_UNICODE_REPLACEMENT_CHARACTER = 0xFFFD; 85 86 version(Python_3_3_Or_Later) { 87 enum PyUnicode_ = "PyUnicode_"; 88 }else version(Python_Unicode_UCS2) { 89 enum PyUnicode_ = "PyUnicodeUCS2_"; 90 }else{ 91 enum PyUnicode_ = "PyUnicodeUCS4_"; 92 } 93 94 /* 95 this function takes defs PyUnicode_XX and transforms them to 96 PyUnicodeUCS4_XX(); 97 alias PyUnicodeUCS4_XX PyUnicode_XX; 98 99 */ 100 string substitute_and_alias()(string code) { 101 import std.algorithm; 102 import std.array; 103 string[] newcodes; 104 LOOP: 105 while(true) { 106 if(startsWith(code,"/*")) { 107 size_t comm_end_index = countUntil(code[2 .. $], "*/"); 108 if(comm_end_index == -1) break; 109 newcodes ~= code[0 .. comm_end_index]; 110 code = code[comm_end_index .. $]; 111 continue; 112 } 113 if(!(startsWith(code,"PyUnicode_") || startsWith(code,"_PyUnicode"))) { 114 size_t index = 0; 115 while(index < code.length) { 116 if(code[index] == '_') { 117 if(startsWith(code[index .. $], "_PyUnicode_")) { 118 break; 119 } 120 }else if(code[index] == 'P') { 121 if(startsWith(code[index .. $], "PyUnicode_")) { 122 break; 123 } 124 }else if(code[index] == '/') { 125 if(startsWith(code[index .. $], "/*")) { 126 break; 127 } 128 } 129 index++; 130 } 131 if(index == code.length) break; 132 newcodes ~= code[0 .. index]; 133 code = code[index .. $]; 134 continue; 135 } 136 size_t end_index = countUntil(code, "("); 137 if(end_index == -1) break; 138 string alias_name = code[0 .. end_index]; 139 string func_name = replace(alias_name, "PyUnicode_", PyUnicode_); 140 size_t index0 = end_index+1; 141 int parencount = 1; 142 while(parencount && index0 < code.length) { 143 if(startsWith(code[index0 .. $], "/*")) { 144 size_t comm_end_index = countUntil(code[index0+2 .. $], "*/"); 145 if(comm_end_index == -1) break LOOP; 146 index0 += comm_end_index; 147 continue; 148 }else if(code[index0] == '(') { 149 parencount++; 150 index0++; 151 }else if(code[index0] == ')') { 152 parencount--; 153 index0++; 154 }else{ 155 index0++; 156 } 157 } 158 size_t semi = countUntil(code[index0 .. $], ";"); 159 if(semi == -1) break; 160 index0 += semi+1; 161 162 string alias_line = "\nalias " ~ func_name ~ " " ~ alias_name ~ ";\n"; 163 newcodes ~= func_name; 164 newcodes ~= code[end_index .. index0]; 165 newcodes ~= "\n /// ditto \n"; 166 newcodes ~= alias_line; 167 168 code = code[index0 .. $]; 169 } 170 171 string newcode; 172 foreach(c; newcodes) { 173 newcode ~= c; 174 } 175 return newcode; 176 } 177 178 enum string unicode_funs = q{ 179 version(Python_2_6_Or_Later) { 180 181 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 182 size. 183 184 u may be NULL which causes the contents to be undefined. It is the 185 user's responsibility to fill in the needed data afterwards. Note 186 that modifying the Unicode object contents after construction is 187 only allowed if u was set to NULL. 188 189 The buffer is copied into the new object. */ 190 /// Availability: >= 2.6 191 PyObject* PyUnicode_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 192 193 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 194 /// Availability: >= 2.6 195 PyObject* PyUnicode_FromStringAndSize( 196 const(char)*u, /* char buffer */ 197 Py_ssize_t size /* size of buffer */ 198 ); 199 200 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 201 Latin-1 encoded bytes */ 202 /// Availability: >= 2.6 203 PyObject* PyUnicode_FromString( 204 const(char)*u /* string */ 205 ); 206 /// Availability: >= 2.6 207 PyObject* PyUnicode_FromFormatV(const(char)*, va_list); 208 /// Availability: >= 2.6 209 PyObject* PyUnicode_FromFormat(const(char)*, ...); 210 211 /** Format the object based on the format_spec, as defined in PEP 3101 212 (Advanced String Formatting). */ 213 /// Availability: >= 2.6 214 PyObject* _PyUnicode_FormatAdvanced(PyObject *obj, 215 Py_UNICODE *format_spec, 216 Py_ssize_t format_spec_len); 217 /// Availability: >= 2.6 218 int PyUnicode_ClearFreeList(); 219 /** 220 Params: 221 string = UTF-7 encoded string 222 length = size of string 223 error = error handling 224 consumed = bytes consumed 225 */ 226 /// Availability: >= 2.6 227 PyObject* PyUnicode_DecodeUTF7Stateful( 228 const(char)* string, 229 Py_ssize_t length, 230 const(char)*errors, 231 Py_ssize_t *consumed 232 ); 233 /** 234 Params: 235 string = UTF-32 encoded string 236 length = size of string 237 error = error handling 238 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 239 */ 240 /// Availability: >= 2.6 241 PyObject* PyUnicode_DecodeUTF32( 242 const(char)* string, 243 Py_ssize_t length, 244 const(char)*errors, 245 int *byteorder 246 ); 247 248 /** 249 Params: 250 string = UTF-32 encoded string 251 length = size of string 252 error = error handling 253 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 254 */ 255 /// Availability: >= 2.6 256 PyObject* PyUnicode_DecodeUTF32Stateful( 257 const(char)*string, 258 Py_ssize_t length, 259 const(char)*errors, 260 int *byteorder, 261 Py_ssize_t *consumed 262 ); 263 /** Returns a Python string using the UTF-32 encoding in native byte 264 order. The string always starts with a BOM mark. */ 265 /// Availability: >= 2.6 266 267 PyObject* PyUnicode_AsUTF32String( 268 PyObject *unicode 269 ); 270 271 /** Returns a Python string object holding the UTF-32 encoded value of 272 the Unicode data. 273 274 If byteorder is not 0, output is written according to the following 275 byte order: 276 277 byteorder == -1: little endian 278 byteorder == 0: native byte order (writes a BOM mark) 279 byteorder == 1: big endian 280 281 If byteorder is 0, the output string will always start with the 282 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 283 prepended. 284 Params: 285 data = Unicode char buffer 286 length = number of Py_UNICODE chars to encode 287 errors = error handling 288 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 289 290 */ 291 /// Availability: >= 2.6 292 PyObject* PyUnicode_EncodeUTF32( 293 const Py_UNICODE *data, 294 Py_ssize_t length, 295 const(char)* errors, 296 int byteorder 297 ); 298 } 299 300 /** Return a read-only pointer to the Unicode object's internal 301 Py_UNICODE buffer. */ 302 Py_UNICODE* PyUnicode_AsUnicode(PyObject* unicode); 303 /** Get the length of the Unicode object. */ 304 Py_ssize_t PyUnicode_GetSize(PyObject* unicode); 305 306 /** Get the maximum ordinal for a Unicode character. */ 307 Py_UNICODE PyUnicode_GetMax(); 308 309 /** Resize an already allocated Unicode object to the new size length. 310 311 _*unicode is modified to point to the new (resized) object and 0 312 returned on success. 313 314 This API may only be called by the function which also called the 315 Unicode constructor. The refcount on the object must be 1. Otherwise, 316 an error is returned. 317 318 Error handling is implemented as follows: an exception is set, -1 319 is returned and *unicode left untouched. 320 Params: 321 unicode = pointer to the new unicode object. 322 length = New length. 323 324 */ 325 int PyUnicode_Resize(PyObject** unicode, Py_ssize_t length); 326 /** Coerce obj to an Unicode object and return a reference with 327 _*incremented* refcount. 328 329 Coercion is done in the following way: 330 331 1. String and other char buffer compatible objects are decoded 332 under the assumptions that they contain data using the current 333 default encoding. Decoding is done in "strict" mode. 334 335 2. All other objects (including Unicode objects) raise an 336 exception. 337 338 The API returns NULL in case of an error. The caller is responsible 339 for decref'ing the returned objects. 340 341 */ 342 PyObject* PyUnicode_FromEncodedObject( 343 PyObject* obj, 344 const(char)* encoding, 345 const(char)* errors); 346 347 /** Coerce obj to an Unicode object and return a reference with 348 _*incremented* refcount. 349 350 Unicode objects are passed back as-is (subclasses are converted to 351 true Unicode objects), all other objects are delegated to 352 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 353 using the default encoding as basis for decoding the object. 354 355 The API returns NULL in case of an error. The caller is responsible 356 for decref'ing the returned objects. 357 358 */ 359 PyObject* PyUnicode_FromObject(PyObject* obj); 360 361 /** Create a Unicode Object from the whcar_t buffer w of the given 362 size. 363 364 The buffer is copied into the new object. */ 365 PyObject* PyUnicode_FromWideChar(const(wchar)* w, Py_ssize_t size); 366 367 /** Copies the Unicode Object contents into the wchar_t buffer w. At 368 most size wchar_t characters are copied. 369 370 Note that the resulting wchar_t string may or may not be 371 0-terminated. It is the responsibility of the caller to make sure 372 that the wchar_t string is 0-terminated in case this is required by 373 the application. 374 375 Returns the number of wchar_t characters copied (excluding a 376 possibly trailing 0-termination character) or -1 in case of an 377 error. */ 378 Py_ssize_t PyUnicode_AsWideChar( 379 PyUnicodeObject* unicode, 380 const(wchar)* w, 381 Py_ssize_t size); 382 383 /** Create a Unicode Object from the given Unicode code point ordinal. 384 385 The ordinal must be in range(0x10000) on narrow Python builds 386 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 387 raised in case it is not. 388 389 */ 390 PyObject* PyUnicode_FromOrdinal(int ordinal); 391 392 /** Return a Python string holding the default encoded value of the 393 Unicode object. 394 395 The resulting string is cached in the Unicode object for subsequent 396 usage by this function. The cached version is needed to implement 397 the character buffer interface and will live (at least) as long as 398 the Unicode object itself. 399 400 The refcount of the string is *not* incremented. 401 402 _*** Exported for internal use by the interpreter only !!! *** 403 404 */ 405 PyObject* _PyUnicode_AsDefaultEncodedString(PyObject *, const(char)*); 406 407 /** Returns the currently active default encoding. 408 409 The default encoding is currently implemented as run-time settable 410 process global. This may change in future versions of the 411 interpreter to become a parameter which is managed on a per-thread 412 basis. 413 414 */ 415 const(char)* PyUnicode_GetDefaultEncoding(); 416 417 /** Sets the currently active default encoding. 418 419 Returns 0 on success, -1 in case of an error. 420 421 */ 422 int PyUnicode_SetDefaultEncoding(const(char)*encoding); 423 424 /** Create a Unicode object by decoding the encoded string s of the 425 given size. 426 Params: 427 s = encoded string 428 size = size of buffer 429 encoding = encoding 430 errors = error handling 431 */ 432 PyObject* PyUnicode_Decode( 433 const(char)* s, 434 Py_ssize_t size, 435 const(char)* encoding, 436 const(char)* errors); 437 438 version(Python_3_0_Or_Later) { 439 /** Decode a Unicode object unicode and return the result as Python 440 object. */ 441 /// Availability: 3.* 442 443 PyObject* PyUnicode_AsDecodedObject( 444 PyObject* unicode, 445 const(char)* encoding, 446 const(char)* errors 447 ); 448 /** Decode a Unicode object unicode and return the result as Unicode 449 object. */ 450 /// Availability: 3.* 451 452 PyObject* PyUnicode_AsDecodedUnicode( 453 PyObject* unicode, 454 const(char)* encoding, 455 const(char)* errors 456 ); 457 } 458 459 /** Encodes a Py_UNICODE buffer of the given size and returns a 460 Python string object. 461 Params: 462 s = Unicode char buffer 463 size = number of Py_UNICODE chars to encode 464 encoding = encoding 465 errors = error handling 466 */ 467 PyObject* PyUnicode_Encode( 468 Py_UNICODE* s, 469 Py_ssize_t size, 470 const(char)* encoding, 471 const(char)* errors); 472 473 /** Encodes a Unicode object and returns the result as Python object. 474 */ 475 PyObject* PyUnicode_AsEncodedObject( 476 PyObject* unicode, 477 const(char)* encoding, 478 const(char)* errors); 479 480 /** Encodes a Unicode object and returns the result as Python string 481 object. */ 482 PyObject* PyUnicode_AsEncodedString( 483 PyObject* unicode, 484 const(char)* encoding, 485 const(char)* errors); 486 487 version(Python_3_0_Or_Later) { 488 /** Encodes a Unicode object and returns the result as Unicode 489 object. */ 490 /// Availability: >= 3.* 491 PyObject* PyUnicode_AsEncodedUnicode( 492 PyObject* unicode, 493 const(char)* encoding, 494 const(char)* errors 495 ); 496 } 497 498 /** 499 Params: 500 string = UTF-7 encoded string 501 length = size of string 502 errors = error handling 503 */ 504 PyObject* PyUnicode_DecodeUTF7( 505 const(char)* string, 506 Py_ssize_t length, 507 const(char)* errors); 508 509 /** 510 Params: 511 data = Unicode char buffer 512 length = number of Py_UNICODE chars to encode 513 base64SetO = Encode RFC2152 Set O characters in base64 514 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 515 errors = error handling 516 */ 517 PyObject* PyUnicode_EncodeUTF7( 518 Py_UNICODE* data, 519 Py_ssize_t length, 520 int encodeSetO, 521 int encodeWhiteSpace, 522 const(char)* errors 523 ); 524 525 /// _ 526 PyObject* PyUnicode_DecodeUTF8( 527 const(char)* string, 528 Py_ssize_t length, 529 const(char)* errors); 530 /// _ 531 PyObject* PyUnicode_DecodeUTF8Stateful( 532 const(char)* string, 533 Py_ssize_t length, 534 const(char)* errors, 535 Py_ssize_t* consumed 536 ); 537 /// _ 538 PyObject* PyUnicode_AsUTF8String(PyObject* unicode); 539 /// _ 540 PyObject* PyUnicode_EncodeUTF8( 541 Py_UNICODE* data, 542 Py_ssize_t length, 543 const(char) *errors); 544 545 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 546 the corresponding Unicode object. 547 548 errors (if non-NULL) defines the error handling. It defaults 549 to "strict". 550 551 If byteorder is non-NULL, the decoder starts decoding using the 552 given byte order: 553 554 *byteorder == -1: little endian 555 *byteorder == 0: native order 556 *byteorder == 1: big endian 557 558 In native mode, the first two bytes of the stream are checked for a 559 BOM mark. If found, the BOM mark is analysed, the byte order 560 adjusted and the BOM skipped. In the other modes, no BOM mark 561 interpretation is done. After completion, *byteorder is set to the 562 current byte order at the end of input data. 563 564 If byteorder is NULL, the codec starts in native order mode. 565 566 */ 567 PyObject* PyUnicode_DecodeUTF16( 568 const(char)* string, 569 Py_ssize_t length, 570 const(char)* errors, 571 int* byteorder); 572 /** 573 Params: 574 string = UTF-16 encoded string 575 length = size of string 576 errors = error handling 577 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 578 consumed = bytes consumed 579 */ 580 PyObject* PyUnicode_DecodeUTF16Stateful( 581 const(char)* string, 582 Py_ssize_t length, 583 const(char)* errors, 584 int* byteorder, 585 Py_ssize_t* consumed 586 ); 587 /** Returns a Python string using the UTF-16 encoding in native byte 588 order. The string always starts with a BOM mark. */ 589 PyObject* PyUnicode_AsUTF16String(PyObject *unicode); 590 /** Returns a Python string object holding the UTF-16 encoded value of 591 the Unicode data. 592 593 If byteorder is not 0, output is written according to the following 594 byte order: 595 596 byteorder == -1: little endian 597 byteorder == 0: native byte order (writes a BOM mark) 598 byteorder == 1: big endian 599 600 If byteorder is 0, the output string will always start with the 601 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 602 prepended. 603 604 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 605 UCS-2. This trick makes it possible to add full UTF-16 capabilities 606 at a later point without compromising the APIs. 607 608 */ 609 PyObject* PyUnicode_EncodeUTF16( 610 Py_UNICODE* data, 611 Py_ssize_t length, 612 const(char)* errors, 613 int byteorder 614 ); 615 616 /// _ 617 PyObject* PyUnicode_DecodeUnicodeEscape( 618 const(char)* string, 619 Py_ssize_t length, 620 const(char)* errors); 621 /// _ 622 PyObject* PyUnicode_AsUnicodeEscapeString( 623 PyObject* unicode); 624 /// _ 625 PyObject* PyUnicode_EncodeUnicodeEscape( 626 Py_UNICODE* data, 627 Py_ssize_t length); 628 /** 629 Params: 630 string = Raw-Unicode-Escape encoded string 631 length = size of string 632 errors = error handling 633 */ 634 PyObject* PyUnicode_DecodeRawUnicodeEscape( 635 const(char)* string, 636 Py_ssize_t length, 637 const(char)* errors); 638 /// _ 639 PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject* unicode); 640 /// _ 641 PyObject* PyUnicode_EncodeRawUnicodeEscape( 642 Py_UNICODE* data, Py_ssize_t length); 643 644 /// _ 645 PyObject* _PyUnicode_DecodeUnicodeInternal( 646 const(char)* string, 647 Py_ssize_t length, 648 const(char)* errors); 649 650 /** 651 Params: 652 string = Latin-1 encoded string 653 length = size of string 654 errors = error handling 655 */ 656 PyObject* PyUnicode_DecodeLatin1( 657 const(char)* string, 658 Py_ssize_t length, 659 const(char)* errors); 660 /// _ 661 PyObject* PyUnicode_AsLatin1String(PyObject *unicode); 662 /** 663 Params: 664 data = Unicode char buffer 665 length = Number of Py_UNICODE chars to encode 666 errors = error handling 667 */ 668 PyObject* PyUnicode_EncodeLatin1( 669 Py_UNICODE* data, 670 Py_ssize_t length, 671 const(char)* errors); 672 673 /** 674 Params: 675 data = Unicode char buffer 676 length = Number of Py_UNICODE chars to encode 677 errors = error handling 678 */ 679 PyObject* PyUnicode_DecodeASCII( 680 const(char)* string, 681 Py_ssize_t length, 682 const(char)* errors); 683 /// _ 684 PyObject* PyUnicode_AsASCIIString(PyObject *unicode); 685 /** 686 Params: 687 data = Unicode char buffer 688 length = Number of Py_UNICODE chars to encode 689 errors = error handling 690 */ 691 PyObject* PyUnicode_EncodeASCII( 692 Py_UNICODE* data, 693 Py_ssize_t length, 694 const(char)* errors); 695 696 /** 697 Params: 698 string = Encoded string 699 length = size of string 700 mapping = character mapping (char ordinal -> unicode ordinal) 701 errors = error handling 702 */ 703 PyObject* PyUnicode_DecodeCharmap( 704 const(char)* string, 705 Py_ssize_t length, 706 PyObject* mapping, 707 const(char)* errors 708 ); 709 /** 710 Params: 711 unicode = Unicode object 712 mapping = character mapping (unicode ordinal -> char ordinal) 713 */ 714 PyObject* PyUnicode_AsCharmapString( 715 PyObject* unicode, 716 PyObject* mapping); 717 /** 718 Params: 719 data = Unicode char buffer 720 length = Number of Py_UNICODE chars to encode 721 mapping = character mapping (unicode ordinal -> char ordinal) 722 errors = error handling 723 */ 724 PyObject* PyUnicode_EncodeCharmap( 725 Py_UNICODE* data, 726 Py_ssize_t length, 727 PyObject* mapping, 728 const(char)* errors 729 ); 730 /** Translate a Py_UNICODE buffer of the given length by applying a 731 character mapping table to it and return the resulting Unicode 732 object. 733 734 The mapping table must map Unicode ordinal integers to Unicode 735 ordinal integers or None (causing deletion of the character). 736 737 Mapping tables may be dictionaries or sequences. Unmapped character 738 ordinals (ones which cause a LookupError) are left untouched and 739 are copied as-is. 740 741 */ 742 PyObject* PyUnicode_TranslateCharmap( 743 Py_UNICODE* data, 744 Py_ssize_t length, 745 PyObject* table, 746 const(char)* errors 747 ); 748 749 version (Windows) { 750 /// Availability: Windows only 751 PyObject* PyUnicode_DecodeMBCS( 752 const(char)* string, 753 Py_ssize_t length, 754 const(char)* errors); 755 /// Availability: Windows only 756 PyObject* PyUnicode_AsMBCSString(PyObject* unicode); 757 /// Availability: Windows only 758 PyObject* PyUnicode_EncodeMBCS( 759 Py_UNICODE* data, 760 Py_ssize_t length, 761 const(char)* errors); 762 } 763 /** Takes a Unicode string holding a decimal value and writes it into 764 an output buffer using standard ASCII digit codes. 765 766 The output buffer has to provide at least length+1 bytes of storage 767 area. The output string is 0-terminated. 768 769 The encoder converts whitespace to ' ', decimal characters to their 770 corresponding ASCII digit and all other Latin-1 characters except 771 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 772 are treated as errors. This includes embedded NULL bytes. 773 774 Error handling is defined by the errors argument: 775 776 NULL or "strict": raise a ValueError 777 "ignore": ignore the wrong characters (these are not copied to the 778 output buffer) 779 "replace": replaces illegal characters with '?' 780 781 Returns 0 on success, -1 on failure. 782 783 */ 784 int PyUnicode_EncodeDecimal( 785 Py_UNICODE* s, 786 Py_ssize_t length, 787 char* output, 788 const(char)* errors); 789 790 /** Concat two strings giving a new Unicode string. */ 791 PyObject* PyUnicode_Concat( 792 PyObject* left, 793 PyObject* right); 794 795 version(Python_3_0_Or_Later) { 796 /** Concat two strings and put the result in *pleft 797 (sets *pleft to NULL on error) 798 Params: 799 pleft = Pointer to left string 800 right = Right string 801 */ 802 /// Availability: 3.* 803 804 void PyUnicode_Append( 805 PyObject** pleft, 806 PyObject* right 807 ); 808 809 /** Concat two strings, put the result in *pleft and drop the right object 810 (sets *pleft to NULL on error) 811 Params: 812 pleft = Pointer to left string 813 */ 814 /// Availability: 3.* 815 void PyUnicode_AppendAndDel( 816 PyObject** pleft, 817 PyObject* right 818 ); 819 } 820 821 /** Split a string giving a list of Unicode strings. 822 823 If sep is NULL, splitting will be done at all whitespace 824 substrings. Otherwise, splits occur at the given separator. 825 826 At most maxsplit splits will be done. If negative, no limit is set. 827 828 Separators are not included in the resulting list. 829 830 */ 831 PyObject* PyUnicode_Split( 832 PyObject* s, 833 PyObject* sep, 834 Py_ssize_t maxsplit); 835 836 /** Ditto PyUnicode_Split, but split at line breaks. 837 838 CRLF is considered to be one line break. Line breaks are not 839 included in the resulting list. */ 840 PyObject* PyUnicode_Splitlines( 841 PyObject* s, 842 int keepends); 843 844 version(Python_2_5_Or_Later) { 845 /** Partition a string using a given separator. */ 846 /// Availability: >= 2.5 847 PyObject* PyUnicode_Partition( 848 PyObject* s, 849 PyObject* sep 850 ); 851 852 /** Partition a string using a given separator, searching from the end 853 of the string. */ 854 855 PyObject* PyUnicode_RPartition( 856 PyObject* s, 857 PyObject* sep 858 ); 859 } 860 861 /** Split a string giving a list of Unicode strings. 862 863 If sep is NULL, splitting will be done at all whitespace 864 substrings. Otherwise, splits occur at the given separator. 865 866 At most maxsplit splits will be done. But unlike PyUnicode_Split 867 PyUnicode_RSplit splits from the end of the string. If negative, 868 no limit is set. 869 870 Separators are not included in the resulting list. 871 872 */ 873 PyObject* PyUnicode_RSplit( 874 PyObject* s, 875 PyObject* sep, 876 Py_ssize_t maxsplit); 877 878 /** Translate a string by applying a character mapping table to it and 879 return the resulting Unicode object. 880 881 The mapping table must map Unicode ordinal integers to Unicode 882 ordinal integers or None (causing deletion of the character). 883 884 Mapping tables may be dictionaries or sequences. Unmapped character 885 ordinals (ones which cause a LookupError) are left untouched and 886 are copied as-is. 887 888 */ 889 PyObject* PyUnicode_Translate( 890 PyObject* str, 891 PyObject* table, 892 const(char)* errors); 893 894 /** Join a sequence of strings using the given separator and return 895 the resulting Unicode string. */ 896 PyObject* PyUnicode_Join( 897 PyObject* separator, 898 PyObject* seq); 899 900 /** Return 1 if substr matches str[start:end] at the given tail end, 0 901 otherwise. */ 902 Py_ssize_t PyUnicode_Tailmatch( 903 PyObject* str, 904 PyObject* substr, 905 Py_ssize_t start, 906 Py_ssize_t end, 907 int direction 908 ); 909 910 /** Return the first position of substr in str[start:end] using the 911 given search direction or -1 if not found. -2 is returned in case 912 an error occurred and an exception is set. */ 913 Py_ssize_t PyUnicode_Find( 914 PyObject* str, 915 PyObject* substr, 916 Py_ssize_t start, 917 Py_ssize_t end, 918 int direction 919 ); 920 921 /** Count the number of occurrences of substr in str[start:end]. */ 922 Py_ssize_t PyUnicode_Count( 923 PyObject* str, 924 PyObject* substr, 925 Py_ssize_t start, 926 Py_ssize_t end); 927 928 /** Replace at most maxcount occurrences of substr in str with replstr 929 and return the resulting Unicode object. */ 930 PyObject* PyUnicode_Replace( 931 PyObject* str, 932 PyObject* substr, 933 PyObject* replstr, 934 Py_ssize_t maxcount 935 ); 936 937 /** Compare two strings and return -1, 0, 1 for less than, equal, 938 greater than resp. */ 939 int PyUnicode_Compare(PyObject* left, PyObject* right); 940 version(Python_3_0_Or_Later) { 941 /** Compare two strings and return -1, 0, 1 for less than, equal, 942 greater than resp. 943 Params: 944 left = 945 right = ASCII-encoded string 946 */ 947 /// Availability: 3.* 948 int PyUnicode_CompareWithASCIIString( 949 PyObject* left, 950 const(char)* right 951 ); 952 } 953 954 version(Python_2_5_Or_Later) { 955 /** Rich compare two strings and return one of the following: 956 957 - NULL in case an exception was raised 958 - Py_True or Py_False for successfuly comparisons 959 - Py_NotImplemented in case the type combination is unknown 960 961 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 962 case the conversion of the arguments to Unicode fails with a 963 UnicodeDecodeError. 964 965 Possible values for op: 966 967 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 968 969 */ 970 /// Availability: >= 2.5 971 PyObject* PyUnicode_RichCompare( 972 PyObject* left, 973 PyObject* right, 974 int op 975 ); 976 } 977 978 /** Apply a argument tuple or dictionary to a format string and return 979 the resulting Unicode string. */ 980 PyObject* PyUnicode_Format(PyObject* format, PyObject* args); 981 982 /** Checks whether element is contained in container and return 1/0 983 accordingly. 984 985 element has to coerce to an one element Unicode string. -1 is 986 returned in case of an error. */ 987 int PyUnicode_Contains(PyObject* container, PyObject* element); 988 989 version(Python_3_0_Or_Later) { 990 /** Checks whether argument is a valid identifier. */ 991 /// Availability: 3.* 992 int PyUnicode_IsIdentifier(PyObject* s); 993 } 994 995 996 /// _ 997 int _PyUnicode_IsLowercase(Py_UNICODE ch); 998 /// _ 999 int _PyUnicode_IsUppercase(Py_UNICODE ch); 1000 /// _ 1001 int _PyUnicode_IsTitlecase(Py_UNICODE ch); 1002 /// _ 1003 int _PyUnicode_IsWhitespace(Py_UNICODE ch); 1004 /// _ 1005 int _PyUnicode_IsLinebreak(Py_UNICODE ch); 1006 /// _ 1007 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch); 1008 /// _ 1009 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch); 1010 /// _ 1011 Py_UNICODE _PyUnicode_ToTitlecase(Py_UNICODE ch); 1012 /// _ 1013 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch); 1014 /// _ 1015 int _PyUnicode_ToDigit(Py_UNICODE ch); 1016 /// _ 1017 double _PyUnicode_ToNumeric(Py_UNICODE ch); 1018 /// _ 1019 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch); 1020 /// _ 1021 int _PyUnicode_IsDigit(Py_UNICODE ch); 1022 /// _ 1023 int _PyUnicode_IsNumeric(Py_UNICODE ch); 1024 /// _ 1025 int _PyUnicode_IsAlpha(Py_UNICODE ch); 1026 1027 }; 1028 1029 /* 1030 pragma(msg,substitute_and_alias(unicode_funs)); 1031 mixin(substitute_and_alias(unicode_funs)); 1032 */ 1033 1034 // waaaa! calling substitute_and_alias breaks linking! 1035 // oh, well. this is probably faster anyways. 1036 // following code is generated by substitute_and_alias. 1037 // don't modify it; modify unicode_funs! 1038 version(Python_3_3_Or_Later) { 1039 version(Python_2_6_Or_Later) { 1040 1041 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 1042 size. 1043 1044 u may be NULL which causes the contents to be undefined. It is the 1045 user's responsibility to fill in the needed data afterwards. Note 1046 that modifying the Unicode object contents after construction is 1047 only allowed if u was set to NULL. 1048 1049 The buffer is copied into the new object. */ 1050 /// Availability: >= 2.6 1051 PyObject* PyUnicode_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 1052 1053 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 1054 /// Availability: >= 2.6 1055 PyObject* PyUnicode_FromStringAndSize( 1056 const(char)*u, /* char buffer */ 1057 Py_ssize_t size /* size of buffer */ 1058 ); 1059 1060 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 1061 Latin-1 encoded bytes */ 1062 /// Availability: >= 2.6 1063 PyObject* PyUnicode_FromString( 1064 const(char)*u /* string */ 1065 ); 1066 1067 /// Availability: >= 2.6 1068 PyObject* PyUnicode_FromFormatV(const(char)*, va_list); 1069 1070 /// Availability: >= 2.6 1071 PyObject* PyUnicode_FromFormat(const(char)*, ...); 1072 1073 /** Format the object based on the format_spec, as defined in PEP 3101 1074 (Advanced String Formatting). */ 1075 /// Availability: >= 2.6 1076 PyObject* _PyUnicode_FormatAdvanced(PyObject *obj, 1077 Py_UNICODE *format_spec, 1078 Py_ssize_t format_spec_len); 1079 1080 /// Availability: >= 2.6 1081 int PyUnicode_ClearFreeList(); 1082 1083 /** 1084 Params: 1085 string = UTF-7 encoded string 1086 length = size of string 1087 error = error handling 1088 consumed = bytes consumed 1089 */ 1090 /// Availability: >= 2.6 1091 PyObject* PyUnicode_DecodeUTF7Stateful( 1092 const(char)* string, 1093 Py_ssize_t length, 1094 const(char)*errors, 1095 Py_ssize_t *consumed 1096 ); 1097 1098 /** 1099 Params: 1100 string = UTF-32 encoded string 1101 length = size of string 1102 error = error handling 1103 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1104 */ 1105 /// Availability: >= 2.6 1106 PyObject* PyUnicode_DecodeUTF32( 1107 const(char)* string, 1108 Py_ssize_t length, 1109 const(char)*errors, 1110 int *byteorder 1111 ); 1112 1113 /** 1114 Params: 1115 string = UTF-32 encoded string 1116 length = size of string 1117 error = error handling 1118 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1119 */ 1120 /// Availability: >= 2.6 1121 PyObject* PyUnicode_DecodeUTF32Stateful( 1122 const(char)*string, 1123 Py_ssize_t length, 1124 const(char)*errors, 1125 int *byteorder, 1126 Py_ssize_t *consumed 1127 ); 1128 1129 /** Returns a Python string using the UTF-32 encoding in native byte 1130 order. The string always starts with a BOM mark. */ 1131 /// Availability: >= 2.6 1132 1133 PyObject* PyUnicode_AsUTF32String( 1134 PyObject *unicode 1135 ); 1136 1137 /** Returns a Python string object holding the UTF-32 encoded value of 1138 the Unicode data. 1139 1140 If byteorder is not 0, output is written according to the following 1141 byte order: 1142 1143 byteorder == -1: little endian 1144 byteorder == 0: native byte order (writes a BOM mark) 1145 byteorder == 1: big endian 1146 1147 If byteorder is 0, the output string will always start with the 1148 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1149 prepended. 1150 Params: 1151 data = Unicode char buffer 1152 length = number of Py_UNICODE chars to encode 1153 errors = error handling 1154 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 1155 1156 */ 1157 /// Availability: >= 2.6 1158 PyObject* PyUnicode_EncodeUTF32( 1159 const Py_UNICODE *data, 1160 Py_ssize_t length, 1161 const(char)* errors, 1162 int byteorder 1163 ); 1164 1165 } 1166 1167 /** Return a read-only pointer to the Unicode object's internal 1168 Py_UNICODE buffer. */ 1169 Py_UNICODE* PyUnicode_AsUnicode(PyObject* unicode); 1170 1171 /** Get the length of the Unicode object. */ 1172 Py_ssize_t PyUnicode_GetSize(PyObject* unicode); 1173 1174 /** Get the maximum ordinal for a Unicode character. */ 1175 Py_UNICODE PyUnicode_GetMax(); 1176 1177 /** Resize an already allocated Unicode object to the new size length. 1178 1179 _*unicode is modified to point to the new (resized) object and 0 1180 returned on success. 1181 1182 This API may only be called by the function which also called the 1183 Unicode constructor. The refcount on the object must be 1. Otherwise, 1184 an error is returned. 1185 1186 Error handling is implemented as follows: an exception is set, -1 1187 is returned and *unicode left untouched. 1188 Params: 1189 unicode = pointer to the new unicode object. 1190 length = New length. 1191 1192 */ 1193 int PyUnicode_Resize(PyObject** unicode, Py_ssize_t length); 1194 1195 /** Coerce obj to an Unicode object and return a reference with 1196 _*incremented* refcount. 1197 1198 Coercion is done in the following way: 1199 1200 1. String and other char buffer compatible objects are decoded 1201 under the assumptions that they contain data using the current 1202 default encoding. Decoding is done in "strict" mode. 1203 1204 2. All other objects (including Unicode objects) raise an 1205 exception. 1206 1207 The API returns NULL in case of an error. The caller is responsible 1208 for decref'ing the returned objects. 1209 1210 */ 1211 PyObject* PyUnicode_FromEncodedObject( 1212 PyObject* obj, 1213 const(char)* encoding, 1214 const(char)* errors); 1215 1216 /** Coerce obj to an Unicode object and return a reference with 1217 _*incremented* refcount. 1218 1219 Unicode objects are passed back as-is (subclasses are converted to 1220 true Unicode objects), all other objects are delegated to 1221 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 1222 using the default encoding as basis for decoding the object. 1223 1224 The API returns NULL in case of an error. The caller is responsible 1225 for decref'ing the returned objects. 1226 1227 */ 1228 PyObject* PyUnicode_FromObject(PyObject* obj); 1229 1230 /** Create a Unicode Object from the whcar_t buffer w of the given 1231 size. 1232 1233 The buffer is copied into the new object. */ 1234 PyObject* PyUnicode_FromWideChar(const(wchar)* w, Py_ssize_t size); 1235 1236 /** Copies the Unicode Object contents into the wchar_t buffer w. At 1237 most size wchar_t characters are copied. 1238 1239 Note that the resulting wchar_t string may or may not be 1240 0-terminated. It is the responsibility of the caller to make sure 1241 that the wchar_t string is 0-terminated in case this is required by 1242 the application. 1243 1244 Returns the number of wchar_t characters copied (excluding a 1245 possibly trailing 0-termination character) or -1 in case of an 1246 error. */ 1247 Py_ssize_t PyUnicode_AsWideChar( 1248 PyUnicodeObject* unicode, 1249 const(wchar)* w, 1250 Py_ssize_t size); 1251 1252 /** Create a Unicode Object from the given Unicode code point ordinal. 1253 1254 The ordinal must be in range(0x10000) on narrow Python builds 1255 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 1256 raised in case it is not. 1257 1258 */ 1259 PyObject* PyUnicode_FromOrdinal(int ordinal); 1260 1261 /** Return a Python string holding the default encoded value of the 1262 Unicode object. 1263 1264 The resulting string is cached in the Unicode object for subsequent 1265 usage by this function. The cached version is needed to implement 1266 the character buffer interface and will live (at least) as long as 1267 the Unicode object itself. 1268 1269 The refcount of the string is *not* incremented. 1270 1271 _*** Exported for internal use by the interpreter only !!! *** 1272 1273 */ 1274 PyObject* _PyUnicode_AsDefaultEncodedString(PyObject *, const(char)*); 1275 1276 /** Returns the currently active default encoding. 1277 1278 The default encoding is currently implemented as run-time settable 1279 process global. This may change in future versions of the 1280 interpreter to become a parameter which is managed on a per-thread 1281 basis. 1282 1283 */ 1284 const(char)* PyUnicode_GetDefaultEncoding(); 1285 1286 /** Sets the currently active default encoding. 1287 1288 Returns 0 on success, -1 in case of an error. 1289 1290 */ 1291 int PyUnicode_SetDefaultEncoding(const(char)*encoding); 1292 1293 /** Create a Unicode object by decoding the encoded string s of the 1294 given size. 1295 Params: 1296 s = encoded string 1297 size = size of buffer 1298 encoding = encoding 1299 errors = error handling 1300 */ 1301 PyObject* PyUnicode_Decode( 1302 const(char)* s, 1303 Py_ssize_t size, 1304 const(char)* encoding, 1305 const(char)* errors); 1306 1307 version(Python_3_0_Or_Later) { 1308 /** Decode a Unicode object unicode and return the result as Python 1309 object. */ 1310 /// Availability: 3.* 1311 1312 PyObject* PyUnicode_AsDecodedObject( 1313 PyObject* unicode, 1314 const(char)* encoding, 1315 const(char)* errors 1316 ); 1317 1318 /** Decode a Unicode object unicode and return the result as Unicode 1319 object. */ 1320 /// Availability: 3.* 1321 1322 PyObject* PyUnicode_AsDecodedUnicode( 1323 PyObject* unicode, 1324 const(char)* encoding, 1325 const(char)* errors 1326 ); 1327 1328 } 1329 1330 /** Encodes a Py_UNICODE buffer of the given size and returns a 1331 Python string object. 1332 Params: 1333 s = Unicode char buffer 1334 size = number of Py_UNICODE chars to encode 1335 encoding = encoding 1336 errors = error handling 1337 */ 1338 PyObject* PyUnicode_Encode( 1339 Py_UNICODE* s, 1340 Py_ssize_t size, 1341 const(char)* encoding, 1342 const(char)* errors); 1343 1344 /** Encodes a Unicode object and returns the result as Python object. 1345 */ 1346 PyObject* PyUnicode_AsEncodedObject( 1347 PyObject* unicode, 1348 const(char)* encoding, 1349 const(char)* errors); 1350 1351 /** Encodes a Unicode object and returns the result as Python string 1352 object. */ 1353 PyObject* PyUnicode_AsEncodedString( 1354 PyObject* unicode, 1355 const(char)* encoding, 1356 const(char)* errors); 1357 1358 version(Python_3_0_Or_Later) { 1359 /** Encodes a Unicode object and returns the result as Unicode 1360 object. */ 1361 /// Availability: >= 3.* 1362 PyObject* PyUnicode_AsEncodedUnicode( 1363 PyObject* unicode, 1364 const(char)* encoding, 1365 const(char)* errors 1366 ); 1367 1368 } 1369 1370 /** 1371 Params: 1372 string = UTF-7 encoded string 1373 length = size of string 1374 errors = error handling 1375 */ 1376 PyObject* PyUnicode_DecodeUTF7( 1377 const(char)* string, 1378 Py_ssize_t length, 1379 const(char)* errors); 1380 1381 /** 1382 Params: 1383 data = Unicode char buffer 1384 length = number of Py_UNICODE chars to encode 1385 base64SetO = Encode RFC2152 Set O characters in base64 1386 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 1387 errors = error handling 1388 */ 1389 PyObject* PyUnicode_EncodeUTF7( 1390 Py_UNICODE* data, 1391 Py_ssize_t length, 1392 int encodeSetO, 1393 int encodeWhiteSpace, 1394 const(char)* errors 1395 ); 1396 1397 /// _ 1398 PyObject* PyUnicode_DecodeUTF8( 1399 const(char)* string, 1400 Py_ssize_t length, 1401 const(char)* errors); 1402 1403 /// _ 1404 PyObject* PyUnicode_DecodeUTF8Stateful( 1405 const(char)* string, 1406 Py_ssize_t length, 1407 const(char)* errors, 1408 Py_ssize_t* consumed 1409 ); 1410 1411 /// _ 1412 PyObject* PyUnicode_AsUTF8String(PyObject* unicode); 1413 1414 /// _ 1415 PyObject* PyUnicode_EncodeUTF8( 1416 Py_UNICODE* data, 1417 Py_ssize_t length, 1418 const(char) *errors); 1419 1420 1421 1422 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 1423 the corresponding Unicode object. 1424 1425 errors (if non-NULL) defines the error handling. It defaults 1426 to "strict". 1427 1428 If byteorder is non-NULL, the decoder starts decoding using the 1429 given byte order: 1430 1431 *byteorder == -1: little endian 1432 *byteorder == 0: native order 1433 *byteorder == 1: big endian 1434 1435 In native mode, the first two bytes of the stream are checked for a 1436 BOM mark. If found, the BOM mark is analysed, the byte order 1437 adjusted and the BOM skipped. In the other modes, no BOM mark 1438 interpretation is done. After completion, *byteorder is set to the 1439 current byte order at the end of input data. 1440 1441 If byteorder is NULL, the codec starts in native order mode. 1442 1443 */ 1444 PyObject* PyUnicode_DecodeUTF16( 1445 const(char)* string, 1446 Py_ssize_t length, 1447 const(char)* errors, 1448 int* byteorder); 1449 1450 1451 /** 1452 Params: 1453 string = UTF-16 encoded string 1454 length = size of string 1455 errors = error handling 1456 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 1457 consumed = bytes consumed 1458 */ 1459 PyObject* PyUnicode_DecodeUTF16Stateful( 1460 const(char)* string, 1461 Py_ssize_t length, 1462 const(char)* errors, 1463 int* byteorder, 1464 Py_ssize_t* consumed 1465 ); 1466 1467 1468 /** Returns a Python string using the UTF-16 encoding in native byte 1469 order. The string always starts with a BOM mark. */ 1470 PyObject* PyUnicode_AsUTF16String(PyObject *unicode); 1471 1472 1473 /** Returns a Python string object holding the UTF-16 encoded value of 1474 the Unicode data. 1475 1476 If byteorder is not 0, output is written according to the following 1477 byte order: 1478 1479 byteorder == -1: little endian 1480 byteorder == 0: native byte order (writes a BOM mark) 1481 byteorder == 1: big endian 1482 1483 If byteorder is 0, the output string will always start with the 1484 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1485 prepended. 1486 1487 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 1488 UCS-2. This trick makes it possible to add full UTF-16 capabilities 1489 at a later point without compromising the APIs. 1490 1491 */ 1492 PyObject* PyUnicode_EncodeUTF16( 1493 Py_UNICODE* data, 1494 Py_ssize_t length, 1495 const(char)* errors, 1496 int byteorder 1497 ); 1498 1499 1500 1501 /// _ 1502 PyObject* PyUnicode_DecodeUnicodeEscape( 1503 const(char)* string, 1504 Py_ssize_t length, 1505 const(char)* errors); 1506 1507 1508 /// _ 1509 PyObject* PyUnicode_AsUnicodeEscapeString( 1510 PyObject* unicode); 1511 1512 1513 /// _ 1514 PyObject* PyUnicode_EncodeUnicodeEscape( 1515 Py_UNICODE* data, 1516 Py_ssize_t length); 1517 1518 1519 /** 1520 Params: 1521 string = Raw-Unicode-Escape encoded string 1522 length = size of string 1523 errors = error handling 1524 */ 1525 PyObject* PyUnicode_DecodeRawUnicodeEscape( 1526 const(char)* string, 1527 Py_ssize_t length, 1528 const(char)* errors); 1529 1530 /// _ 1531 PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject* unicode); 1532 1533 /// _ 1534 PyObject* PyUnicode_EncodeRawUnicodeEscape( 1535 Py_UNICODE* data, Py_ssize_t length); 1536 1537 /// _ 1538 PyObject* _PyUnicode_DecodeUnicodeInternal( 1539 const(char)* string, 1540 Py_ssize_t length, 1541 const(char)* errors); 1542 1543 /** 1544 Params: 1545 string = Latin-1 encoded string 1546 length = size of string 1547 errors = error handling 1548 */ 1549 PyObject* PyUnicode_DecodeLatin1( 1550 const(char)* string, 1551 Py_ssize_t length, 1552 const(char)* errors); 1553 1554 /// _ 1555 PyObject* PyUnicode_AsLatin1String(PyObject *unicode); 1556 1557 /** 1558 Params: 1559 data = Unicode char buffer 1560 length = Number of Py_UNICODE chars to encode 1561 errors = error handling 1562 */ 1563 PyObject* PyUnicode_EncodeLatin1( 1564 Py_UNICODE* data, 1565 Py_ssize_t length, 1566 const(char)* errors); 1567 1568 /** 1569 Params: 1570 data = Unicode char buffer 1571 length = Number of Py_UNICODE chars to encode 1572 errors = error handling 1573 */ 1574 PyObject* PyUnicode_DecodeASCII( 1575 const(char)* string, 1576 Py_ssize_t length, 1577 const(char)* errors); 1578 1579 /// _ 1580 PyObject* PyUnicode_AsASCIIString(PyObject *unicode); 1581 1582 /** 1583 Params: 1584 data = Unicode char buffer 1585 length = Number of Py_UNICODE chars to encode 1586 errors = error handling 1587 */ 1588 PyObject* PyUnicode_EncodeASCII( 1589 Py_UNICODE* data, 1590 Py_ssize_t length, 1591 const(char)* errors); 1592 1593 /** 1594 Params: 1595 string = Encoded string 1596 length = size of string 1597 mapping = character mapping (char ordinal -> unicode ordinal) 1598 errors = error handling 1599 */ 1600 PyObject* PyUnicode_DecodeCharmap( 1601 const(char)* string, 1602 Py_ssize_t length, 1603 PyObject* mapping, 1604 const(char)* errors 1605 ); 1606 1607 /** 1608 Params: 1609 unicode = Unicode object 1610 mapping = character mapping (unicode ordinal -> char ordinal) 1611 */ 1612 PyObject* PyUnicode_AsCharmapString( 1613 PyObject* unicode, 1614 PyObject* mapping); 1615 1616 /** 1617 Params: 1618 data = Unicode char buffer 1619 length = Number of Py_UNICODE chars to encode 1620 mapping = character mapping (unicode ordinal -> char ordinal) 1621 errors = error handling 1622 */ 1623 PyObject* PyUnicode_EncodeCharmap( 1624 Py_UNICODE* data, 1625 Py_ssize_t length, 1626 PyObject* mapping, 1627 const(char)* errors 1628 ); 1629 1630 /** Translate a Py_UNICODE buffer of the given length by applying a 1631 character mapping table to it and return the resulting Unicode 1632 object. 1633 1634 The mapping table must map Unicode ordinal integers to Unicode 1635 ordinal integers or None (causing deletion of the character). 1636 1637 Mapping tables may be dictionaries or sequences. Unmapped character 1638 ordinals (ones which cause a LookupError) are left untouched and 1639 are copied as-is. 1640 1641 */ 1642 PyObject* PyUnicode_TranslateCharmap( 1643 Py_UNICODE* data, 1644 Py_ssize_t length, 1645 PyObject* table, 1646 const(char)* errors 1647 ); 1648 1649 version (Windows) { 1650 /// Availability: Windows only 1651 PyObject* PyUnicode_DecodeMBCS( 1652 const(char)* string, 1653 Py_ssize_t length, 1654 const(char)* errors); 1655 1656 /// Availability: Windows only 1657 PyObject* PyUnicode_AsMBCSString(PyObject* unicode); 1658 1659 /// Availability: Windows only 1660 PyObject* PyUnicode_EncodeMBCS( 1661 Py_UNICODE* data, 1662 Py_ssize_t length, 1663 const(char)* errors); 1664 1665 } 1666 /** Takes a Unicode string holding a decimal value and writes it into 1667 an output buffer using standard ASCII digit codes. 1668 1669 The output buffer has to provide at least length+1 bytes of storage 1670 area. The output string is 0-terminated. 1671 1672 The encoder converts whitespace to ' ', decimal characters to their 1673 corresponding ASCII digit and all other Latin-1 characters except 1674 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 1675 are treated as errors. This includes embedded NULL bytes. 1676 1677 Error handling is defined by the errors argument: 1678 1679 NULL or "strict": raise a ValueError 1680 "ignore": ignore the wrong characters (these are not copied to the 1681 output buffer) 1682 "replace": replaces illegal characters with '?' 1683 1684 Returns 0 on success, -1 on failure. 1685 1686 */ 1687 int PyUnicode_EncodeDecimal( 1688 Py_UNICODE* s, 1689 Py_ssize_t length, 1690 char* output, 1691 const(char)* errors); 1692 1693 /** Concat two strings giving a new Unicode string. */ 1694 PyObject* PyUnicode_Concat( 1695 PyObject* left, 1696 PyObject* right); 1697 1698 version(Python_3_0_Or_Later) { 1699 /** Concat two strings and put the result in *pleft 1700 (sets *pleft to NULL on error) 1701 Params: 1702 pleft = Pointer to left string 1703 right = Right string 1704 */ 1705 /// Availability: 3.* 1706 1707 void PyUnicode_Append( 1708 PyObject** pleft, 1709 PyObject* right 1710 ); 1711 1712 /** Concat two strings, put the result in *pleft and drop the right object 1713 (sets *pleft to NULL on error) 1714 Params: 1715 pleft = Pointer to left string 1716 */ 1717 /// Availability: 3.* 1718 void PyUnicode_AppendAndDel( 1719 PyObject** pleft, 1720 PyObject* right 1721 ); 1722 1723 } 1724 1725 /** Split a string giving a list of Unicode strings. 1726 1727 If sep is NULL, splitting will be done at all whitespace 1728 substrings. Otherwise, splits occur at the given separator. 1729 1730 At most maxsplit splits will be done. If negative, no limit is set. 1731 1732 Separators are not included in the resulting list. 1733 1734 */ 1735 PyObject* PyUnicode_Split( 1736 PyObject* s, 1737 PyObject* sep, 1738 Py_ssize_t maxsplit); 1739 1740 /** Ditto PyUnicode_Split, but split at line breaks. 1741 1742 CRLF is considered to be one line break. Line breaks are not 1743 included in the resulting list. */ 1744 PyObject* PyUnicode_Splitlines( 1745 PyObject* s, 1746 int keepends); 1747 1748 version(Python_2_5_Or_Later) { 1749 /** Partition a string using a given separator. */ 1750 /// Availability: >= 2.5 1751 PyObject* PyUnicode_Partition( 1752 PyObject* s, 1753 PyObject* sep 1754 ); 1755 1756 1757 /** Partition a string using a given separator, searching from the end 1758 of the string. */ 1759 1760 PyObject* PyUnicode_RPartition( 1761 PyObject* s, 1762 PyObject* sep 1763 ); 1764 1765 } 1766 1767 /** Split a string giving a list of Unicode strings. 1768 1769 If sep is NULL, splitting will be done at all whitespace 1770 substrings. Otherwise, splits occur at the given separator. 1771 1772 At most maxsplit splits will be done. But unlike PyUnicode_Split 1773 PyUnicode_RSplit splits from the end of the string. If negative, 1774 no limit is set. 1775 1776 Separators are not included in the resulting list. 1777 1778 */ 1779 PyObject* PyUnicode_RSplit( 1780 PyObject* s, 1781 PyObject* sep, 1782 Py_ssize_t maxsplit); 1783 1784 1785 /** Translate a string by applying a character mapping table to it and 1786 return the resulting Unicode object. 1787 1788 The mapping table must map Unicode ordinal integers to Unicode 1789 ordinal integers or None (causing deletion of the character). 1790 1791 Mapping tables may be dictionaries or sequences. Unmapped character 1792 ordinals (ones which cause a LookupError) are left untouched and 1793 are copied as-is. 1794 1795 */ 1796 PyObject* PyUnicode_Translate( 1797 PyObject* str, 1798 PyObject* table, 1799 const(char)* errors); 1800 1801 /** Join a sequence of strings using the given separator and return 1802 the resulting Unicode string. */ 1803 PyObject* PyUnicode_Join( 1804 PyObject* separator, 1805 PyObject* seq); 1806 1807 /** Return 1 if substr matches str[start:end] at the given tail end, 0 1808 otherwise. */ 1809 Py_ssize_t PyUnicode_Tailmatch( 1810 PyObject* str, 1811 PyObject* substr, 1812 Py_ssize_t start, 1813 Py_ssize_t end, 1814 int direction 1815 ); 1816 1817 1818 /** Return the first position of substr in str[start:end] using the 1819 given search direction or -1 if not found. -2 is returned in case 1820 an error occurred and an exception is set. */ 1821 Py_ssize_t PyUnicode_Find( 1822 PyObject* str, 1823 PyObject* substr, 1824 Py_ssize_t start, 1825 Py_ssize_t end, 1826 int direction 1827 ); 1828 1829 /** Count the number of occurrences of substr in str[start:end]. */ 1830 Py_ssize_t PyUnicode_Count( 1831 PyObject* str, 1832 PyObject* substr, 1833 Py_ssize_t start, 1834 Py_ssize_t end); 1835 1836 /** Replace at most maxcount occurrences of substr in str with replstr 1837 and return the resulting Unicode object. */ 1838 PyObject* PyUnicode_Replace( 1839 PyObject* str, 1840 PyObject* substr, 1841 PyObject* replstr, 1842 Py_ssize_t maxcount 1843 ); 1844 1845 /** Compare two strings and return -1, 0, 1 for less than, equal, 1846 greater than resp. */ 1847 int PyUnicode_Compare(PyObject* left, PyObject* right); 1848 1849 version(Python_3_0_Or_Later) { 1850 /** Compare two strings and return -1, 0, 1 for less than, equal, 1851 greater than resp. 1852 Params: 1853 left = 1854 right = ASCII-encoded string 1855 */ 1856 /// Availability: 3.* 1857 int PyUnicode_CompareWithASCIIString( 1858 PyObject* left, 1859 const(char)* right 1860 ); 1861 } 1862 1863 version(Python_2_5_Or_Later) { 1864 /** Rich compare two strings and return one of the following: 1865 1866 - NULL in case an exception was raised 1867 - Py_True or Py_False for successfuly comparisons 1868 - Py_NotImplemented in case the type combination is unknown 1869 1870 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 1871 case the conversion of the arguments to Unicode fails with a 1872 UnicodeDecodeError. 1873 1874 Possible values for op: 1875 1876 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 1877 1878 */ 1879 /// Availability: >= 2.5 1880 PyObject* PyUnicode_RichCompare( 1881 PyObject* left, 1882 PyObject* right, 1883 int op 1884 ); 1885 } 1886 1887 /** Apply a argument tuple or dictionary to a format string and return 1888 the resulting Unicode string. */ 1889 PyObject* PyUnicode_Format(PyObject* format, PyObject* args); 1890 1891 /** Checks whether element is contained in container and return 1/0 1892 accordingly. 1893 1894 element has to coerce to an one element Unicode string. -1 is 1895 returned in case of an error. */ 1896 int PyUnicode_Contains(PyObject* container, PyObject* element); 1897 1898 version(Python_3_0_Or_Later) { 1899 /** Checks whether argument is a valid identifier. */ 1900 /// Availability: 3.* 1901 int PyUnicode_IsIdentifier(PyObject* s); 1902 } 1903 1904 1905 /// _ 1906 int _PyUnicode_IsLowercase(Py_UNICODE ch); 1907 1908 /// _ 1909 int _PyUnicode_IsUppercase(Py_UNICODE ch); 1910 1911 /// _ 1912 int _PyUnicode_IsTitlecase(Py_UNICODE ch); 1913 1914 /// _ 1915 int _PyUnicode_IsWhitespace(Py_UNICODE ch); 1916 1917 /// _ 1918 int _PyUnicode_IsLinebreak(Py_UNICODE ch); 1919 1920 /// _ 1921 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch); 1922 1923 /// _ 1924 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch); 1925 1926 /// _ 1927 Py_UNICODE _PyUnicode_ToTitlecase(Py_UNICODE ch); 1928 1929 /// _ 1930 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch); 1931 1932 /// _ 1933 int _PyUnicode_ToDigit(Py_UNICODE ch); 1934 1935 /// _ 1936 double _PyUnicode_ToNumeric(Py_UNICODE ch); 1937 1938 /// _ 1939 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch); 1940 1941 /// _ 1942 int _PyUnicode_IsDigit(Py_UNICODE ch); 1943 1944 /// _ 1945 int _PyUnicode_IsNumeric(Py_UNICODE ch); 1946 1947 /// _ 1948 int _PyUnicode_IsAlpha(Py_UNICODE ch); 1949 1950 }else version(Python_Unicode_UCS2) { 1951 1952 version(Python_2_6_Or_Later) { 1953 1954 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 1955 size. 1956 1957 u may be NULL which causes the contents to be undefined. It is the 1958 user's responsibility to fill in the needed data afterwards. Note 1959 that modifying the Unicode object contents after construction is 1960 only allowed if u was set to NULL. 1961 1962 The buffer is copied into the new object. */ 1963 /// Availability: >= 2.6 1964 PyObject* PyUnicodeUCS2_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 1965 /// ditto 1966 1967 alias PyUnicodeUCS2_FromUnicode PyUnicode_FromUnicode; 1968 1969 1970 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 1971 /// Availability: >= 2.6 1972 PyObject* PyUnicodeUCS2_FromStringAndSize( 1973 const(char)*u, /* char buffer */ 1974 Py_ssize_t size /* size of buffer */ 1975 ); 1976 /// ditto 1977 1978 alias PyUnicodeUCS2_FromStringAndSize PyUnicode_FromStringAndSize; 1979 1980 1981 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 1982 Latin-1 encoded bytes */ 1983 /// Availability: >= 2.6 1984 PyObject* PyUnicodeUCS2_FromString( 1985 const(char)*u /* string */ 1986 ); 1987 /// ditto 1988 1989 alias PyUnicodeUCS2_FromString PyUnicode_FromString; 1990 1991 /// Availability: >= 2.6 1992 PyObject* PyUnicodeUCS2_FromFormatV(const(char)*, va_list); 1993 /// ditto 1994 1995 alias PyUnicodeUCS2_FromFormatV PyUnicode_FromFormatV; 1996 1997 /// Availability: >= 2.6 1998 PyObject* PyUnicodeUCS2_FromFormat(const(char)*, ...); 1999 /// ditto 2000 2001 alias PyUnicodeUCS2_FromFormat PyUnicode_FromFormat; 2002 2003 2004 /** Format the object based on the format_spec, as defined in PEP 3101 2005 (Advanced String Formatting). */ 2006 /// Availability: >= 2.6 2007 PyObject* _PyUnicodeUCS2_FormatAdvanced(PyObject *obj, 2008 Py_UNICODE *format_spec, 2009 Py_ssize_t format_spec_len); 2010 /// ditto 2011 2012 alias _PyUnicodeUCS2_FormatAdvanced _PyUnicode_FormatAdvanced; 2013 2014 /// Availability: >= 2.6 2015 int PyUnicodeUCS2_ClearFreeList(); 2016 /// ditto 2017 2018 alias PyUnicodeUCS2_ClearFreeList PyUnicode_ClearFreeList; 2019 2020 /** 2021 Params: 2022 string = UTF-7 encoded string 2023 length = size of string 2024 error = error handling 2025 consumed = bytes consumed 2026 */ 2027 /// Availability: >= 2.6 2028 PyObject* PyUnicodeUCS2_DecodeUTF7Stateful( 2029 const(char)* string, 2030 Py_ssize_t length, 2031 const(char)*errors, 2032 Py_ssize_t *consumed 2033 ); 2034 /// ditto 2035 2036 alias PyUnicodeUCS2_DecodeUTF7Stateful PyUnicode_DecodeUTF7Stateful; 2037 2038 /** 2039 Params: 2040 string = UTF-32 encoded string 2041 length = size of string 2042 error = error handling 2043 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2044 */ 2045 /// Availability: >= 2.6 2046 PyObject* PyUnicodeUCS2_DecodeUTF32( 2047 const(char)* string, 2048 Py_ssize_t length, 2049 const(char)*errors, 2050 int *byteorder 2051 ); 2052 /// ditto 2053 2054 alias PyUnicodeUCS2_DecodeUTF32 PyUnicode_DecodeUTF32; 2055 2056 2057 /** 2058 Params: 2059 string = UTF-32 encoded string 2060 length = size of string 2061 error = error handling 2062 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2063 */ 2064 /// Availability: >= 2.6 2065 PyObject* PyUnicodeUCS2_DecodeUTF32Stateful( 2066 const(char)*string, 2067 Py_ssize_t length, 2068 const(char)*errors, 2069 int *byteorder, 2070 Py_ssize_t *consumed 2071 ); 2072 /// ditto 2073 2074 alias PyUnicodeUCS2_DecodeUTF32Stateful PyUnicode_DecodeUTF32Stateful; 2075 2076 /** Returns a Python string using the UTF-32 encoding in native byte 2077 order. The string always starts with a BOM mark. */ 2078 /// Availability: >= 2.6 2079 2080 PyObject* PyUnicodeUCS2_AsUTF32String( 2081 PyObject *unicode 2082 ); 2083 /// ditto 2084 2085 alias PyUnicodeUCS2_AsUTF32String PyUnicode_AsUTF32String; 2086 2087 2088 /** Returns a Python string object holding the UTF-32 encoded value of 2089 the Unicode data. 2090 2091 If byteorder is not 0, output is written according to the following 2092 byte order: 2093 2094 byteorder == -1: little endian 2095 byteorder == 0: native byte order (writes a BOM mark) 2096 byteorder == 1: big endian 2097 2098 If byteorder is 0, the output string will always start with the 2099 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 2100 prepended. 2101 Params: 2102 data = Unicode char buffer 2103 length = number of Py_UNICODE chars to encode 2104 errors = error handling 2105 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 2106 2107 */ 2108 /// Availability: >= 2.6 2109 PyObject* PyUnicodeUCS2_EncodeUTF32( 2110 const Py_UNICODE *data, 2111 Py_ssize_t length, 2112 const(char)* errors, 2113 int byteorder 2114 ); 2115 /// ditto 2116 2117 alias PyUnicodeUCS2_EncodeUTF32 PyUnicode_EncodeUTF32; 2118 2119 } 2120 2121 /** Return a read-only pointer to the Unicode object's internal 2122 Py_UNICODE buffer. */ 2123 Py_UNICODE* PyUnicodeUCS2_AsUnicode(PyObject* unicode); 2124 /// ditto 2125 2126 alias PyUnicodeUCS2_AsUnicode PyUnicode_AsUnicode; 2127 2128 /** Get the length of the Unicode object. */ 2129 Py_ssize_t PyUnicodeUCS2_GetSize(PyObject* unicode); 2130 /// ditto 2131 2132 alias PyUnicodeUCS2_GetSize PyUnicode_GetSize; 2133 2134 2135 /** Get the maximum ordinal for a Unicode character. */ 2136 Py_UNICODE PyUnicodeUCS2_GetMax(); 2137 /// ditto 2138 2139 alias PyUnicodeUCS2_GetMax PyUnicode_GetMax; 2140 2141 2142 /** Resize an already allocated Unicode object to the new size length. 2143 2144 _*unicode is modified to point to the new (resized) object and 0 2145 returned on success. 2146 2147 This API may only be called by the function which also called the 2148 Unicode constructor. The refcount on the object must be 1. Otherwise, 2149 an error is returned. 2150 2151 Error handling is implemented as follows: an exception is set, -1 2152 is returned and *unicode left untouched. 2153 Params: 2154 unicode = pointer to the new unicode object. 2155 length = New length. 2156 2157 */ 2158 int PyUnicodeUCS2_Resize(PyObject** unicode, Py_ssize_t length); 2159 /// ditto 2160 2161 alias PyUnicodeUCS2_Resize PyUnicode_Resize; 2162 2163 /** Coerce obj to an Unicode object and return a reference with 2164 _*incremented* refcount. 2165 2166 Coercion is done in the following way: 2167 2168 1. String and other char buffer compatible objects are decoded 2169 under the assumptions that they contain data using the current 2170 default encoding. Decoding is done in "strict" mode. 2171 2172 2. All other objects (including Unicode objects) raise an 2173 exception. 2174 2175 The API returns NULL in case of an error. The caller is responsible 2176 for decref'ing the returned objects. 2177 2178 */ 2179 PyObject* PyUnicodeUCS2_FromEncodedObject( 2180 PyObject* obj, 2181 const(char)* encoding, 2182 const(char)* errors); 2183 /// ditto 2184 2185 alias PyUnicodeUCS2_FromEncodedObject PyUnicode_FromEncodedObject; 2186 2187 2188 /** Coerce obj to an Unicode object and return a reference with 2189 _*incremented* refcount. 2190 2191 Unicode objects are passed back as-is (subclasses are converted to 2192 true Unicode objects), all other objects are delegated to 2193 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 2194 using the default encoding as basis for decoding the object. 2195 2196 The API returns NULL in case of an error. The caller is responsible 2197 for decref'ing the returned objects. 2198 2199 */ 2200 PyObject* PyUnicodeUCS2_FromObject(PyObject* obj); 2201 /// ditto 2202 2203 alias PyUnicodeUCS2_FromObject PyUnicode_FromObject; 2204 2205 2206 /** Create a Unicode Object from the whcar_t buffer w of the given 2207 size. 2208 2209 The buffer is copied into the new object. */ 2210 PyObject* PyUnicodeUCS2_FromWideChar(const(wchar)* w, Py_ssize_t size); 2211 /// ditto 2212 2213 alias PyUnicodeUCS2_FromWideChar PyUnicode_FromWideChar; 2214 2215 2216 /** Copies the Unicode Object contents into the wchar_t buffer w. At 2217 most size wchar_t characters are copied. 2218 2219 Note that the resulting wchar_t string may or may not be 2220 0-terminated. It is the responsibility of the caller to make sure 2221 that the wchar_t string is 0-terminated in case this is required by 2222 the application. 2223 2224 Returns the number of wchar_t characters copied (excluding a 2225 possibly trailing 0-termination character) or -1 in case of an 2226 error. */ 2227 Py_ssize_t PyUnicodeUCS2_AsWideChar( 2228 PyUnicodeObject* unicode, 2229 const(wchar)* w, 2230 Py_ssize_t size); 2231 /// ditto 2232 2233 alias PyUnicodeUCS2_AsWideChar PyUnicode_AsWideChar; 2234 2235 2236 /** Create a Unicode Object from the given Unicode code point ordinal. 2237 2238 The ordinal must be in range(0x10000) on narrow Python builds 2239 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 2240 raised in case it is not. 2241 2242 */ 2243 PyObject* PyUnicodeUCS2_FromOrdinal(int ordinal); 2244 /// ditto 2245 2246 alias PyUnicodeUCS2_FromOrdinal PyUnicode_FromOrdinal; 2247 2248 2249 /** Return a Python string holding the default encoded value of the 2250 Unicode object. 2251 2252 The resulting string is cached in the Unicode object for subsequent 2253 usage by this function. The cached version is needed to implement 2254 the character buffer interface and will live (at least) as long as 2255 the Unicode object itself. 2256 2257 The refcount of the string is *not* incremented. 2258 2259 _*** Exported for internal use by the interpreter only !!! *** 2260 2261 */ 2262 PyObject* _PyUnicodeUCS2_AsDefaultEncodedString(PyObject *, const(char)*); 2263 /// ditto 2264 2265 alias _PyUnicodeUCS2_AsDefaultEncodedString _PyUnicode_AsDefaultEncodedString; 2266 2267 2268 /** Returns the currently active default encoding. 2269 2270 The default encoding is currently implemented as run-time settable 2271 process global. This may change in future versions of the 2272 interpreter to become a parameter which is managed on a per-thread 2273 basis. 2274 2275 */ 2276 const(char)* PyUnicodeUCS2_GetDefaultEncoding(); 2277 /// ditto 2278 2279 alias PyUnicodeUCS2_GetDefaultEncoding PyUnicode_GetDefaultEncoding; 2280 2281 2282 /** Sets the currently active default encoding. 2283 2284 Returns 0 on success, -1 in case of an error. 2285 2286 */ 2287 int PyUnicodeUCS2_SetDefaultEncoding(const(char)*encoding); 2288 /// ditto 2289 2290 alias PyUnicodeUCS2_SetDefaultEncoding PyUnicode_SetDefaultEncoding; 2291 2292 2293 /** Create a Unicode object by decoding the encoded string s of the 2294 given size. 2295 Params: 2296 s = encoded string 2297 size = size of buffer 2298 encoding = encoding 2299 errors = error handling 2300 */ 2301 PyObject* PyUnicodeUCS2_Decode( 2302 const(char)* s, 2303 Py_ssize_t size, 2304 const(char)* encoding, 2305 const(char)* errors); 2306 /// ditto 2307 2308 alias PyUnicodeUCS2_Decode PyUnicode_Decode; 2309 2310 2311 version(Python_3_0_Or_Later) { 2312 /** Decode a Unicode object unicode and return the result as Python 2313 object. */ 2314 /// Availability: 3.* 2315 2316 PyObject* PyUnicodeUCS2_AsDecodedObject( 2317 PyObject* unicode, 2318 const(char)* encoding, 2319 const(char)* errors 2320 ); 2321 /// ditto 2322 2323 alias PyUnicodeUCS2_AsDecodedObject PyUnicode_AsDecodedObject; 2324 2325 /** Decode a Unicode object unicode and return the result as Unicode 2326 object. */ 2327 /// Availability: 3.* 2328 2329 PyObject* PyUnicodeUCS2_AsDecodedUnicode( 2330 PyObject* unicode, 2331 const(char)* encoding, 2332 const(char)* errors 2333 ); 2334 /// ditto 2335 2336 alias PyUnicodeUCS2_AsDecodedUnicode PyUnicode_AsDecodedUnicode; 2337 2338 } 2339 2340 /** Encodes a Py_UNICODE buffer of the given size and returns a 2341 Python string object. 2342 Params: 2343 s = Unicode char buffer 2344 size = number of Py_UNICODE chars to encode 2345 encoding = encoding 2346 errors = error handling 2347 */ 2348 PyObject* PyUnicodeUCS2_Encode( 2349 Py_UNICODE* s, 2350 Py_ssize_t size, 2351 const(char)* encoding, 2352 const(char)* errors); 2353 /// ditto 2354 2355 alias PyUnicodeUCS2_Encode PyUnicode_Encode; 2356 2357 2358 /** Encodes a Unicode object and returns the result as Python object. 2359 */ 2360 PyObject* PyUnicodeUCS2_AsEncodedObject( 2361 PyObject* unicode, 2362 const(char)* encoding, 2363 const(char)* errors); 2364 /// ditto 2365 2366 alias PyUnicodeUCS2_AsEncodedObject PyUnicode_AsEncodedObject; 2367 2368 2369 /** Encodes a Unicode object and returns the result as Python string 2370 object. */ 2371 PyObject* PyUnicodeUCS2_AsEncodedString( 2372 PyObject* unicode, 2373 const(char)* encoding, 2374 const(char)* errors); 2375 /// ditto 2376 2377 alias PyUnicodeUCS2_AsEncodedString PyUnicode_AsEncodedString; 2378 2379 2380 version(Python_3_0_Or_Later) { 2381 /** Encodes a Unicode object and returns the result as Unicode 2382 object. */ 2383 /// Availability: >= 3.* 2384 PyObject* PyUnicodeUCS2_AsEncodedUnicode( 2385 PyObject* unicode, 2386 const(char)* encoding, 2387 const(char)* errors 2388 ); 2389 /// ditto 2390 2391 alias PyUnicodeUCS2_AsEncodedUnicode PyUnicode_AsEncodedUnicode; 2392 2393 } 2394 2395 /** 2396 Params: 2397 string = UTF-7 encoded string 2398 length = size of string 2399 errors = error handling 2400 */ 2401 PyObject* PyUnicodeUCS2_DecodeUTF7( 2402 const(char)* string, 2403 Py_ssize_t length, 2404 const(char)* errors); 2405 /// ditto 2406 2407 alias PyUnicodeUCS2_DecodeUTF7 PyUnicode_DecodeUTF7; 2408 2409 2410 /** 2411 Params: 2412 data = Unicode char buffer 2413 length = number of Py_UNICODE chars to encode 2414 base64SetO = Encode RFC2152 Set O characters in base64 2415 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 2416 errors = error handling 2417 */ 2418 PyObject* PyUnicodeUCS2_EncodeUTF7( 2419 Py_UNICODE* data, 2420 Py_ssize_t length, 2421 int encodeSetO, 2422 int encodeWhiteSpace, 2423 const(char)* errors 2424 ); 2425 /// ditto 2426 2427 alias PyUnicodeUCS2_EncodeUTF7 PyUnicode_EncodeUTF7; 2428 2429 2430 /// _ 2431 PyObject* PyUnicodeUCS2_DecodeUTF8( 2432 const(char)* string, 2433 Py_ssize_t length, 2434 const(char)* errors); 2435 /// ditto 2436 2437 alias PyUnicodeUCS2_DecodeUTF8 PyUnicode_DecodeUTF8; 2438 2439 /// _ 2440 PyObject* PyUnicodeUCS2_DecodeUTF8Stateful( 2441 const(char)* string, 2442 Py_ssize_t length, 2443 const(char)* errors, 2444 Py_ssize_t* consumed 2445 ); 2446 /// ditto 2447 2448 alias PyUnicodeUCS2_DecodeUTF8Stateful PyUnicode_DecodeUTF8Stateful; 2449 2450 /// _ 2451 PyObject* PyUnicodeUCS2_AsUTF8String(PyObject* unicode); 2452 /// ditto 2453 2454 alias PyUnicodeUCS2_AsUTF8String PyUnicode_AsUTF8String; 2455 2456 /// _ 2457 PyObject* PyUnicodeUCS2_EncodeUTF8( 2458 Py_UNICODE* data, 2459 Py_ssize_t length, 2460 const(char) *errors); 2461 /// ditto 2462 2463 alias PyUnicodeUCS2_EncodeUTF8 PyUnicode_EncodeUTF8; 2464 2465 2466 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 2467 the corresponding Unicode object. 2468 2469 errors (if non-NULL) defines the error handling. It defaults 2470 to "strict". 2471 2472 If byteorder is non-NULL, the decoder starts decoding using the 2473 given byte order: 2474 2475 *byteorder == -1: little endian 2476 *byteorder == 0: native order 2477 *byteorder == 1: big endian 2478 2479 In native mode, the first two bytes of the stream are checked for a 2480 BOM mark. If found, the BOM mark is analysed, the byte order 2481 adjusted and the BOM skipped. In the other modes, no BOM mark 2482 interpretation is done. After completion, *byteorder is set to the 2483 current byte order at the end of input data. 2484 2485 If byteorder is NULL, the codec starts in native order mode. 2486 2487 */ 2488 PyObject* PyUnicodeUCS2_DecodeUTF16( 2489 const(char)* string, 2490 Py_ssize_t length, 2491 const(char)* errors, 2492 int* byteorder); 2493 /// ditto 2494 2495 alias PyUnicodeUCS2_DecodeUTF16 PyUnicode_DecodeUTF16; 2496 2497 /** 2498 Params: 2499 string = UTF-16 encoded string 2500 length = size of string 2501 errors = error handling 2502 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 2503 consumed = bytes consumed 2504 */ 2505 PyObject* PyUnicodeUCS2_DecodeUTF16Stateful( 2506 const(char)* string, 2507 Py_ssize_t length, 2508 const(char)* errors, 2509 int* byteorder, 2510 Py_ssize_t* consumed 2511 ); 2512 /// ditto 2513 2514 alias PyUnicodeUCS2_DecodeUTF16Stateful PyUnicode_DecodeUTF16Stateful; 2515 2516 /** Returns a Python string using the UTF-16 encoding in native byte 2517 order. The string always starts with a BOM mark. */ 2518 PyObject* PyUnicodeUCS2_AsUTF16String(PyObject *unicode); 2519 /// ditto 2520 2521 alias PyUnicodeUCS2_AsUTF16String PyUnicode_AsUTF16String; 2522 2523 /** Returns a Python string object holding the UTF-16 encoded value of 2524 the Unicode data. 2525 2526 If byteorder is not 0, output is written according to the following 2527 byte order: 2528 2529 byteorder == -1: little endian 2530 byteorder == 0: native byte order (writes a BOM mark) 2531 byteorder == 1: big endian 2532 2533 If byteorder is 0, the output string will always start with the 2534 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 2535 prepended. 2536 2537 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 2538 UCS-2. This trick makes it possible to add full UTF-16 capabilities 2539 at a later point without compromising the APIs. 2540 2541 */ 2542 PyObject* PyUnicodeUCS2_EncodeUTF16( 2543 Py_UNICODE* data, 2544 Py_ssize_t length, 2545 const(char)* errors, 2546 int byteorder 2547 ); 2548 /// ditto 2549 2550 alias PyUnicodeUCS2_EncodeUTF16 PyUnicode_EncodeUTF16; 2551 2552 2553 /// _ 2554 PyObject* PyUnicodeUCS2_DecodeUnicodeEscape( 2555 const(char)* string, 2556 Py_ssize_t length, 2557 const(char)* errors); 2558 /// ditto 2559 2560 alias PyUnicodeUCS2_DecodeUnicodeEscape PyUnicode_DecodeUnicodeEscape; 2561 2562 /// _ 2563 PyObject* PyUnicodeUCS2_AsUnicodeEscapeString( 2564 PyObject* unicode); 2565 /// ditto 2566 2567 alias PyUnicodeUCS2_AsUnicodeEscapeString PyUnicode_AsUnicodeEscapeString; 2568 2569 /// _ 2570 PyObject* PyUnicodeUCS2_EncodeUnicodeEscape( 2571 Py_UNICODE* data, 2572 Py_ssize_t length); 2573 /// ditto 2574 2575 alias PyUnicodeUCS2_EncodeUnicodeEscape PyUnicode_EncodeUnicodeEscape; 2576 2577 /** 2578 Params: 2579 string = Raw-Unicode-Escape encoded string 2580 length = size of string 2581 errors = error handling 2582 */ 2583 PyObject* PyUnicodeUCS2_DecodeRawUnicodeEscape( 2584 const(char)* string, 2585 Py_ssize_t length, 2586 const(char)* errors); 2587 /// ditto 2588 2589 alias PyUnicodeUCS2_DecodeRawUnicodeEscape PyUnicode_DecodeRawUnicodeEscape; 2590 2591 /// _ 2592 PyObject* PyUnicodeUCS2_AsRawUnicodeEscapeString(PyObject* unicode); 2593 /// ditto 2594 2595 alias PyUnicodeUCS2_AsRawUnicodeEscapeString PyUnicode_AsRawUnicodeEscapeString; 2596 2597 /// _ 2598 PyObject* PyUnicodeUCS2_EncodeRawUnicodeEscape( 2599 Py_UNICODE* data, Py_ssize_t length); 2600 /// ditto 2601 2602 alias PyUnicodeUCS2_EncodeRawUnicodeEscape PyUnicode_EncodeRawUnicodeEscape; 2603 2604 2605 /// _ 2606 PyObject* _PyUnicodeUCS2_DecodeUnicodeInternal( 2607 const(char)* string, 2608 Py_ssize_t length, 2609 const(char)* errors); 2610 /// ditto 2611 2612 alias _PyUnicodeUCS2_DecodeUnicodeInternal _PyUnicode_DecodeUnicodeInternal; 2613 2614 2615 /** 2616 Params: 2617 string = Latin-1 encoded string 2618 length = size of string 2619 errors = error handling 2620 */ 2621 PyObject* PyUnicodeUCS2_DecodeLatin1( 2622 const(char)* string, 2623 Py_ssize_t length, 2624 const(char)* errors); 2625 /// ditto 2626 2627 alias PyUnicodeUCS2_DecodeLatin1 PyUnicode_DecodeLatin1; 2628 2629 /// _ 2630 PyObject* PyUnicodeUCS2_AsLatin1String(PyObject *unicode); 2631 /// ditto 2632 2633 alias PyUnicodeUCS2_AsLatin1String PyUnicode_AsLatin1String; 2634 2635 /** 2636 Params: 2637 data = Unicode char buffer 2638 length = Number of Py_UNICODE chars to encode 2639 errors = error handling 2640 */ 2641 PyObject* PyUnicodeUCS2_EncodeLatin1( 2642 Py_UNICODE* data, 2643 Py_ssize_t length, 2644 const(char)* errors); 2645 /// ditto 2646 2647 alias PyUnicodeUCS2_EncodeLatin1 PyUnicode_EncodeLatin1; 2648 2649 2650 /** 2651 Params: 2652 data = Unicode char buffer 2653 length = Number of Py_UNICODE chars to encode 2654 errors = error handling 2655 */ 2656 PyObject* PyUnicodeUCS2_DecodeASCII( 2657 const(char)* string, 2658 Py_ssize_t length, 2659 const(char)* errors); 2660 /// ditto 2661 2662 alias PyUnicodeUCS2_DecodeASCII PyUnicode_DecodeASCII; 2663 2664 /// _ 2665 PyObject* PyUnicodeUCS2_AsASCIIString(PyObject *unicode); 2666 /// ditto 2667 2668 alias PyUnicodeUCS2_AsASCIIString PyUnicode_AsASCIIString; 2669 2670 /** 2671 Params: 2672 data = Unicode char buffer 2673 length = Number of Py_UNICODE chars to encode 2674 errors = error handling 2675 */ 2676 PyObject* PyUnicodeUCS2_EncodeASCII( 2677 Py_UNICODE* data, 2678 Py_ssize_t length, 2679 const(char)* errors); 2680 /// ditto 2681 2682 alias PyUnicodeUCS2_EncodeASCII PyUnicode_EncodeASCII; 2683 2684 2685 /** 2686 Params: 2687 string = Encoded string 2688 length = size of string 2689 mapping = character mapping (char ordinal -> unicode ordinal) 2690 errors = error handling 2691 */ 2692 PyObject* PyUnicodeUCS2_DecodeCharmap( 2693 const(char)* string, 2694 Py_ssize_t length, 2695 PyObject* mapping, 2696 const(char)* errors 2697 ); 2698 /// ditto 2699 2700 alias PyUnicodeUCS2_DecodeCharmap PyUnicode_DecodeCharmap; 2701 2702 /** 2703 Params: 2704 unicode = Unicode object 2705 mapping = character mapping (unicode ordinal -> char ordinal) 2706 */ 2707 PyObject* PyUnicodeUCS2_AsCharmapString( 2708 PyObject* unicode, 2709 PyObject* mapping); 2710 /// ditto 2711 2712 alias PyUnicodeUCS2_AsCharmapString PyUnicode_AsCharmapString; 2713 2714 /** 2715 Params: 2716 data = Unicode char buffer 2717 length = Number of Py_UNICODE chars to encode 2718 mapping = character mapping (unicode ordinal -> char ordinal) 2719 errors = error handling 2720 */ 2721 PyObject* PyUnicodeUCS2_EncodeCharmap( 2722 Py_UNICODE* data, 2723 Py_ssize_t length, 2724 PyObject* mapping, 2725 const(char)* errors 2726 ); 2727 /// ditto 2728 2729 alias PyUnicodeUCS2_EncodeCharmap PyUnicode_EncodeCharmap; 2730 2731 /** Translate a Py_UNICODE buffer of the given length by applying a 2732 character mapping table to it and return the resulting Unicode 2733 object. 2734 2735 The mapping table must map Unicode ordinal integers to Unicode 2736 ordinal integers or None (causing deletion of the character). 2737 2738 Mapping tables may be dictionaries or sequences. Unmapped character 2739 ordinals (ones which cause a LookupError) are left untouched and 2740 are copied as-is. 2741 2742 */ 2743 PyObject* PyUnicodeUCS2_TranslateCharmap( 2744 Py_UNICODE* data, 2745 Py_ssize_t length, 2746 PyObject* table, 2747 const(char)* errors 2748 ); 2749 /// ditto 2750 2751 alias PyUnicodeUCS2_TranslateCharmap PyUnicode_TranslateCharmap; 2752 2753 2754 version (Windows) { 2755 /// Availability: Windows only 2756 PyObject* PyUnicodeUCS2_DecodeMBCS( 2757 const(char)* string, 2758 Py_ssize_t length, 2759 const(char)* errors); 2760 /// ditto 2761 2762 alias PyUnicodeUCS2_DecodeMBCS PyUnicode_DecodeMBCS; 2763 2764 /// Availability: Windows only 2765 PyObject* PyUnicodeUCS2_AsMBCSString(PyObject* unicode); 2766 /// ditto 2767 2768 alias PyUnicodeUCS2_AsMBCSString PyUnicode_AsMBCSString; 2769 2770 /// Availability: Windows only 2771 PyObject* PyUnicodeUCS2_EncodeMBCS( 2772 Py_UNICODE* data, 2773 Py_ssize_t length, 2774 const(char)* errors); 2775 /// ditto 2776 2777 alias PyUnicodeUCS2_EncodeMBCS PyUnicode_EncodeMBCS; 2778 2779 } 2780 /** Takes a Unicode string holding a decimal value and writes it into 2781 an output buffer using standard ASCII digit codes. 2782 2783 The output buffer has to provide at least length+1 bytes of storage 2784 area. The output string is 0-terminated. 2785 2786 The encoder converts whitespace to ' ', decimal characters to their 2787 corresponding ASCII digit and all other Latin-1 characters except 2788 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 2789 are treated as errors. This includes embedded NULL bytes. 2790 2791 Error handling is defined by the errors argument: 2792 2793 NULL or "strict": raise a ValueError 2794 "ignore": ignore the wrong characters (these are not copied to the 2795 output buffer) 2796 "replace": replaces illegal characters with '?' 2797 2798 Returns 0 on success, -1 on failure. 2799 2800 */ 2801 int PyUnicodeUCS2_EncodeDecimal( 2802 Py_UNICODE* s, 2803 Py_ssize_t length, 2804 char* output, 2805 const(char)* errors); 2806 /// ditto 2807 2808 alias PyUnicodeUCS2_EncodeDecimal PyUnicode_EncodeDecimal; 2809 2810 2811 /** Concat two strings giving a new Unicode string. */ 2812 PyObject* PyUnicodeUCS2_Concat( 2813 PyObject* left, 2814 PyObject* right); 2815 /// ditto 2816 2817 alias PyUnicodeUCS2_Concat PyUnicode_Concat; 2818 2819 2820 version(Python_3_0_Or_Later) { 2821 /** Concat two strings and put the result in *pleft 2822 (sets *pleft to NULL on error) 2823 Params: 2824 pleft = Pointer to left string 2825 right = Right string 2826 */ 2827 /// Availability: 3.* 2828 2829 void PyUnicodeUCS2_Append( 2830 PyObject** pleft, 2831 PyObject* right 2832 ); 2833 /// ditto 2834 2835 alias PyUnicodeUCS2_Append PyUnicode_Append; 2836 2837 2838 /** Concat two strings, put the result in *pleft and drop the right object 2839 (sets *pleft to NULL on error) 2840 Params: 2841 pleft = Pointer to left string 2842 */ 2843 /// Availability: 3.* 2844 void PyUnicodeUCS2_AppendAndDel( 2845 PyObject** pleft, 2846 PyObject* right 2847 ); 2848 /// ditto 2849 2850 alias PyUnicodeUCS2_AppendAndDel PyUnicode_AppendAndDel; 2851 2852 } 2853 2854 /** Split a string giving a list of Unicode strings. 2855 2856 If sep is NULL, splitting will be done at all whitespace 2857 substrings. Otherwise, splits occur at the given separator. 2858 2859 At most maxsplit splits will be done. If negative, no limit is set. 2860 2861 Separators are not included in the resulting list. 2862 2863 */ 2864 PyObject* PyUnicodeUCS2_Split( 2865 PyObject* s, 2866 PyObject* sep, 2867 Py_ssize_t maxsplit); 2868 /// ditto 2869 2870 alias PyUnicodeUCS2_Split PyUnicode_Split; 2871 2872 2873 /** Ditto PyUnicode_Split, but split at line breaks. 2874 2875 CRLF is considered to be one line break. Line breaks are not 2876 included in the resulting list. */ 2877 PyObject* PyUnicodeUCS2_Splitlines( 2878 PyObject* s, 2879 int keepends); 2880 /// ditto 2881 2882 alias PyUnicodeUCS2_Splitlines PyUnicode_Splitlines; 2883 2884 2885 version(Python_2_5_Or_Later) { 2886 /** Partition a string using a given separator. */ 2887 /// Availability: >= 2.5 2888 PyObject* PyUnicodeUCS2_Partition( 2889 PyObject* s, 2890 PyObject* sep 2891 ); 2892 /// ditto 2893 2894 alias PyUnicodeUCS2_Partition PyUnicode_Partition; 2895 2896 2897 /** Partition a string using a given separator, searching from the end 2898 of the string. */ 2899 2900 PyObject* PyUnicodeUCS2_RPartition( 2901 PyObject* s, 2902 PyObject* sep 2903 ); 2904 /// ditto 2905 2906 alias PyUnicodeUCS2_RPartition PyUnicode_RPartition; 2907 2908 } 2909 2910 /** Split a string giving a list of Unicode strings. 2911 2912 If sep is NULL, splitting will be done at all whitespace 2913 substrings. Otherwise, splits occur at the given separator. 2914 2915 At most maxsplit splits will be done. But unlike PyUnicode_Split 2916 PyUnicode_RSplit splits from the end of the string. If negative, 2917 no limit is set. 2918 2919 Separators are not included in the resulting list. 2920 2921 */ 2922 PyObject* PyUnicodeUCS2_RSplit( 2923 PyObject* s, 2924 PyObject* sep, 2925 Py_ssize_t maxsplit); 2926 /// ditto 2927 2928 alias PyUnicodeUCS2_RSplit PyUnicode_RSplit; 2929 2930 2931 /** Translate a string by applying a character mapping table to it and 2932 return the resulting Unicode object. 2933 2934 The mapping table must map Unicode ordinal integers to Unicode 2935 ordinal integers or None (causing deletion of the character). 2936 2937 Mapping tables may be dictionaries or sequences. Unmapped character 2938 ordinals (ones which cause a LookupError) are left untouched and 2939 are copied as-is. 2940 2941 */ 2942 PyObject* PyUnicodeUCS2_Translate( 2943 PyObject* str, 2944 PyObject* table, 2945 const(char)* errors); 2946 /// ditto 2947 2948 alias PyUnicodeUCS2_Translate PyUnicode_Translate; 2949 2950 2951 /** Join a sequence of strings using the given separator and return 2952 the resulting Unicode string. */ 2953 PyObject* PyUnicodeUCS2_Join( 2954 PyObject* separator, 2955 PyObject* seq); 2956 /// ditto 2957 2958 alias PyUnicodeUCS2_Join PyUnicode_Join; 2959 2960 2961 /** Return 1 if substr matches str[start:end] at the given tail end, 0 2962 otherwise. */ 2963 Py_ssize_t PyUnicodeUCS2_Tailmatch( 2964 PyObject* str, 2965 PyObject* substr, 2966 Py_ssize_t start, 2967 Py_ssize_t end, 2968 int direction 2969 ); 2970 /// ditto 2971 2972 alias PyUnicodeUCS2_Tailmatch PyUnicode_Tailmatch; 2973 2974 2975 /** Return the first position of substr in str[start:end] using the 2976 given search direction or -1 if not found. -2 is returned in case 2977 an error occurred and an exception is set. */ 2978 Py_ssize_t PyUnicodeUCS2_Find( 2979 PyObject* str, 2980 PyObject* substr, 2981 Py_ssize_t start, 2982 Py_ssize_t end, 2983 int direction 2984 ); 2985 /// ditto 2986 2987 alias PyUnicodeUCS2_Find PyUnicode_Find; 2988 2989 2990 /** Count the number of occurrences of substr in str[start:end]. */ 2991 Py_ssize_t PyUnicodeUCS2_Count( 2992 PyObject* str, 2993 PyObject* substr, 2994 Py_ssize_t start, 2995 Py_ssize_t end); 2996 /// ditto 2997 2998 alias PyUnicodeUCS2_Count PyUnicode_Count; 2999 3000 3001 /** Replace at most maxcount occurrences of substr in str with replstr 3002 and return the resulting Unicode object. */ 3003 PyObject* PyUnicodeUCS2_Replace( 3004 PyObject* str, 3005 PyObject* substr, 3006 PyObject* replstr, 3007 Py_ssize_t maxcount 3008 ); 3009 /// ditto 3010 3011 alias PyUnicodeUCS2_Replace PyUnicode_Replace; 3012 3013 3014 /** Compare two strings and return -1, 0, 1 for less than, equal, 3015 greater than resp. */ 3016 int PyUnicodeUCS2_Compare(PyObject* left, PyObject* right); 3017 /// ditto 3018 3019 alias PyUnicodeUCS2_Compare PyUnicode_Compare; 3020 3021 version(Python_3_0_Or_Later) { 3022 /** Compare two strings and return -1, 0, 1 for less than, equal, 3023 greater than resp. 3024 Params: 3025 left = 3026 right = ASCII-encoded string 3027 */ 3028 /// Availability: 3.* 3029 int PyUnicodeUCS2_CompareWithASCIIString( 3030 PyObject* left, 3031 const(char)* right 3032 ); 3033 /// ditto 3034 3035 alias PyUnicodeUCS2_CompareWithASCIIString PyUnicode_CompareWithASCIIString; 3036 3037 } 3038 3039 version(Python_2_5_Or_Later) { 3040 /** Rich compare two strings and return one of the following: 3041 3042 - NULL in case an exception was raised 3043 - Py_True or Py_False for successfuly comparisons 3044 - Py_NotImplemented in case the type combination is unknown 3045 3046 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 3047 case the conversion of the arguments to Unicode fails with a 3048 UnicodeDecodeError. 3049 3050 Possible values for op: 3051 3052 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 3053 3054 */ 3055 /// Availability: >= 2.5 3056 PyObject* PyUnicodeUCS2_RichCompare( 3057 PyObject* left, 3058 PyObject* right, 3059 int op 3060 ); 3061 /// ditto 3062 3063 alias PyUnicodeUCS2_RichCompare PyUnicode_RichCompare; 3064 3065 } 3066 3067 /** Apply a argument tuple or dictionary to a format string and return 3068 the resulting Unicode string. */ 3069 PyObject* PyUnicodeUCS2_Format(PyObject* format, PyObject* args); 3070 /// ditto 3071 3072 alias PyUnicodeUCS2_Format PyUnicode_Format; 3073 3074 3075 /** Checks whether element is contained in container and return 1/0 3076 accordingly. 3077 3078 element has to coerce to an one element Unicode string. -1 is 3079 returned in case of an error. */ 3080 int PyUnicodeUCS2_Contains(PyObject* container, PyObject* element); 3081 /// ditto 3082 3083 alias PyUnicodeUCS2_Contains PyUnicode_Contains; 3084 3085 3086 version(Python_3_0_Or_Later) { 3087 /** Checks whether argument is a valid identifier. */ 3088 /// Availability: 3.* 3089 int PyUnicodeUCS2_IsIdentifier(PyObject* s); 3090 /// ditto 3091 3092 alias PyUnicodeUCS2_IsIdentifier PyUnicode_IsIdentifier; 3093 3094 } 3095 3096 3097 /// _ 3098 int _PyUnicodeUCS2_IsLowercase(Py_UNICODE ch); 3099 /// ditto 3100 3101 alias _PyUnicodeUCS2_IsLowercase _PyUnicode_IsLowercase; 3102 3103 /// _ 3104 int _PyUnicodeUCS2_IsUppercase(Py_UNICODE ch); 3105 /// ditto 3106 3107 alias _PyUnicodeUCS2_IsUppercase _PyUnicode_IsUppercase; 3108 3109 /// _ 3110 int _PyUnicodeUCS2_IsTitlecase(Py_UNICODE ch); 3111 /// ditto 3112 3113 alias _PyUnicodeUCS2_IsTitlecase _PyUnicode_IsTitlecase; 3114 3115 /// _ 3116 int _PyUnicodeUCS2_IsWhitespace(Py_UNICODE ch); 3117 /// ditto 3118 3119 alias _PyUnicodeUCS2_IsWhitespace _PyUnicode_IsWhitespace; 3120 3121 /// _ 3122 int _PyUnicodeUCS2_IsLinebreak(Py_UNICODE ch); 3123 /// ditto 3124 3125 alias _PyUnicodeUCS2_IsLinebreak _PyUnicode_IsLinebreak; 3126 3127 /// _ 3128 Py_UNICODE _PyUnicodeUCS2_ToLowercase(Py_UNICODE ch); 3129 /// ditto 3130 3131 alias _PyUnicodeUCS2_ToLowercase _PyUnicode_ToLowercase; 3132 3133 /// _ 3134 Py_UNICODE _PyUnicodeUCS2_ToUppercase(Py_UNICODE ch); 3135 /// ditto 3136 3137 alias _PyUnicodeUCS2_ToUppercase _PyUnicode_ToUppercase; 3138 3139 /// _ 3140 Py_UNICODE _PyUnicodeUCS2_ToTitlecase(Py_UNICODE ch); 3141 /// ditto 3142 3143 alias _PyUnicodeUCS2_ToTitlecase _PyUnicode_ToTitlecase; 3144 3145 /// _ 3146 int _PyUnicodeUCS2_ToDecimalDigit(Py_UNICODE ch); 3147 /// ditto 3148 3149 alias _PyUnicodeUCS2_ToDecimalDigit _PyUnicode_ToDecimalDigit; 3150 3151 /// _ 3152 int _PyUnicodeUCS2_ToDigit(Py_UNICODE ch); 3153 /// ditto 3154 3155 alias _PyUnicodeUCS2_ToDigit _PyUnicode_ToDigit; 3156 3157 /// _ 3158 double _PyUnicodeUCS2_ToNumeric(Py_UNICODE ch); 3159 /// ditto 3160 3161 alias _PyUnicodeUCS2_ToNumeric _PyUnicode_ToNumeric; 3162 3163 /// _ 3164 int _PyUnicodeUCS2_IsDecimalDigit(Py_UNICODE ch); 3165 /// ditto 3166 3167 alias _PyUnicodeUCS2_IsDecimalDigit _PyUnicode_IsDecimalDigit; 3168 3169 /// _ 3170 int _PyUnicodeUCS2_IsDigit(Py_UNICODE ch); 3171 /// ditto 3172 3173 alias _PyUnicodeUCS2_IsDigit _PyUnicode_IsDigit; 3174 3175 /// _ 3176 int _PyUnicodeUCS2_IsNumeric(Py_UNICODE ch); 3177 /// ditto 3178 3179 alias _PyUnicodeUCS2_IsNumeric _PyUnicode_IsNumeric; 3180 3181 /// _ 3182 int _PyUnicodeUCS2_IsAlpha(Py_UNICODE ch); 3183 /// ditto 3184 3185 alias _PyUnicodeUCS2_IsAlpha _PyUnicode_IsAlpha; 3186 3187 }else{ 3188 3189 version(Python_2_6_Or_Later) { 3190 3191 /** Create a Unicode Object from the Py_UNICODE buffer u of the given 3192 size. 3193 3194 u may be NULL which causes the contents to be undefined. It is the 3195 user's responsibility to fill in the needed data afterwards. Note 3196 that modifying the Unicode object contents after construction is 3197 only allowed if u was set to NULL. 3198 3199 The buffer is copied into the new object. */ 3200 /// Availability: >= 2.6 3201 PyObject* PyUnicodeUCS4_FromUnicode(Py_UNICODE* u, Py_ssize_t size); 3202 /// ditto 3203 3204 alias PyUnicodeUCS4_FromUnicode PyUnicode_FromUnicode; 3205 3206 3207 /** Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ 3208 /// Availability: >= 2.6 3209 PyObject* PyUnicodeUCS4_FromStringAndSize( 3210 const(char)*u, /* char buffer */ 3211 Py_ssize_t size /* size of buffer */ 3212 ); 3213 /// ditto 3214 3215 alias PyUnicodeUCS4_FromStringAndSize PyUnicode_FromStringAndSize; 3216 3217 3218 /** Similar to PyUnicode_FromUnicode(), but u points to null-terminated 3219 Latin-1 encoded bytes */ 3220 /// Availability: >= 2.6 3221 PyObject* PyUnicodeUCS4_FromString( 3222 const(char)*u /* string */ 3223 ); 3224 /// ditto 3225 3226 alias PyUnicodeUCS4_FromString PyUnicode_FromString; 3227 3228 /// Availability: >= 2.6 3229 PyObject* PyUnicodeUCS4_FromFormatV(const(char)*, va_list); 3230 /// ditto 3231 3232 alias PyUnicodeUCS4_FromFormatV PyUnicode_FromFormatV; 3233 3234 /// Availability: >= 2.6 3235 PyObject* PyUnicodeUCS4_FromFormat(const(char)*, ...); 3236 /// ditto 3237 3238 alias PyUnicodeUCS4_FromFormat PyUnicode_FromFormat; 3239 3240 3241 /** Format the object based on the format_spec, as defined in PEP 3101 3242 (Advanced String Formatting). */ 3243 /// Availability: >= 2.6 3244 PyObject* _PyUnicodeUCS4_FormatAdvanced(PyObject *obj, 3245 Py_UNICODE *format_spec, 3246 Py_ssize_t format_spec_len); 3247 /// ditto 3248 3249 alias _PyUnicodeUCS4_FormatAdvanced _PyUnicode_FormatAdvanced; 3250 3251 /// Availability: >= 2.6 3252 int PyUnicodeUCS4_ClearFreeList(); 3253 /// ditto 3254 3255 alias PyUnicodeUCS4_ClearFreeList PyUnicode_ClearFreeList; 3256 3257 /** 3258 Params: 3259 string = UTF-7 encoded string 3260 length = size of string 3261 error = error handling 3262 consumed = bytes consumed 3263 */ 3264 /// Availability: >= 2.6 3265 PyObject* PyUnicodeUCS4_DecodeUTF7Stateful( 3266 const(char)* string, 3267 Py_ssize_t length, 3268 const(char)*errors, 3269 Py_ssize_t *consumed 3270 ); 3271 /// ditto 3272 3273 alias PyUnicodeUCS4_DecodeUTF7Stateful PyUnicode_DecodeUTF7Stateful; 3274 3275 /** 3276 Params: 3277 string = UTF-32 encoded string 3278 length = size of string 3279 error = error handling 3280 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3281 */ 3282 /// Availability: >= 2.6 3283 PyObject* PyUnicodeUCS4_DecodeUTF32( 3284 const(char)* string, 3285 Py_ssize_t length, 3286 const(char)*errors, 3287 int *byteorder 3288 ); 3289 /// ditto 3290 3291 alias PyUnicodeUCS4_DecodeUTF32 PyUnicode_DecodeUTF32; 3292 3293 3294 /** 3295 Params: 3296 string = UTF-32 encoded string 3297 length = size of string 3298 error = error handling 3299 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3300 */ 3301 /// Availability: >= 2.6 3302 PyObject* PyUnicodeUCS4_DecodeUTF32Stateful( 3303 const(char)*string, 3304 Py_ssize_t length, 3305 const(char)*errors, 3306 int *byteorder, 3307 Py_ssize_t *consumed 3308 ); 3309 /// ditto 3310 3311 alias PyUnicodeUCS4_DecodeUTF32Stateful PyUnicode_DecodeUTF32Stateful; 3312 3313 /** Returns a Python string using the UTF-32 encoding in native byte 3314 order. The string always starts with a BOM mark. */ 3315 /// Availability: >= 2.6 3316 3317 PyObject* PyUnicodeUCS4_AsUTF32String( 3318 PyObject *unicode 3319 ); 3320 /// ditto 3321 3322 alias PyUnicodeUCS4_AsUTF32String PyUnicode_AsUTF32String; 3323 3324 3325 /** Returns a Python string object holding the UTF-32 encoded value of 3326 the Unicode data. 3327 3328 If byteorder is not 0, output is written according to the following 3329 byte order: 3330 3331 byteorder == -1: little endian 3332 byteorder == 0: native byte order (writes a BOM mark) 3333 byteorder == 1: big endian 3334 3335 If byteorder is 0, the output string will always start with the 3336 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 3337 prepended. 3338 Params: 3339 data = Unicode char buffer 3340 length = number of Py_UNICODE chars to encode 3341 errors = error handling 3342 byteorder = byteorder to use 0=BOM+native;-1=LE,1=BE 3343 3344 */ 3345 /// Availability: >= 2.6 3346 PyObject* PyUnicodeUCS4_EncodeUTF32( 3347 const Py_UNICODE *data, 3348 Py_ssize_t length, 3349 const(char)* errors, 3350 int byteorder 3351 ); 3352 /// ditto 3353 3354 alias PyUnicodeUCS4_EncodeUTF32 PyUnicode_EncodeUTF32; 3355 3356 } 3357 3358 /** Return a read-only pointer to the Unicode object's internal 3359 Py_UNICODE buffer. */ 3360 Py_UNICODE* PyUnicodeUCS4_AsUnicode(PyObject* unicode); 3361 /// ditto 3362 3363 alias PyUnicodeUCS4_AsUnicode PyUnicode_AsUnicode; 3364 3365 /** Get the length of the Unicode object. */ 3366 Py_ssize_t PyUnicodeUCS4_GetSize(PyObject* unicode); 3367 /// ditto 3368 3369 alias PyUnicodeUCS4_GetSize PyUnicode_GetSize; 3370 3371 3372 /** Get the maximum ordinal for a Unicode character. */ 3373 Py_UNICODE PyUnicodeUCS4_GetMax(); 3374 /// ditto 3375 3376 alias PyUnicodeUCS4_GetMax PyUnicode_GetMax; 3377 3378 3379 /** Resize an already allocated Unicode object to the new size length. 3380 3381 _*unicode is modified to point to the new (resized) object and 0 3382 returned on success. 3383 3384 This API may only be called by the function which also called the 3385 Unicode constructor. The refcount on the object must be 1. Otherwise, 3386 an error is returned. 3387 3388 Error handling is implemented as follows: an exception is set, -1 3389 is returned and *unicode left untouched. 3390 Params: 3391 unicode = pointer to the new unicode object. 3392 length = New length. 3393 3394 */ 3395 int PyUnicodeUCS4_Resize(PyObject** unicode, Py_ssize_t length); 3396 /// ditto 3397 3398 alias PyUnicodeUCS4_Resize PyUnicode_Resize; 3399 3400 /** Coerce obj to an Unicode object and return a reference with 3401 _*incremented* refcount. 3402 3403 Coercion is done in the following way: 3404 3405 1. String and other char buffer compatible objects are decoded 3406 under the assumptions that they contain data using the current 3407 default encoding. Decoding is done in "strict" mode. 3408 3409 2. All other objects (including Unicode objects) raise an 3410 exception. 3411 3412 The API returns NULL in case of an error. The caller is responsible 3413 for decref'ing the returned objects. 3414 3415 */ 3416 PyObject* PyUnicodeUCS4_FromEncodedObject( 3417 PyObject* obj, 3418 const(char)* encoding, 3419 const(char)* errors); 3420 /// ditto 3421 3422 alias PyUnicodeUCS4_FromEncodedObject PyUnicode_FromEncodedObject; 3423 3424 3425 /** Coerce obj to an Unicode object and return a reference with 3426 _*incremented* refcount. 3427 3428 Unicode objects are passed back as-is (subclasses are converted to 3429 true Unicode objects), all other objects are delegated to 3430 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 3431 using the default encoding as basis for decoding the object. 3432 3433 The API returns NULL in case of an error. The caller is responsible 3434 for decref'ing the returned objects. 3435 3436 */ 3437 PyObject* PyUnicodeUCS4_FromObject(PyObject* obj); 3438 /// ditto 3439 3440 alias PyUnicodeUCS4_FromObject PyUnicode_FromObject; 3441 3442 3443 /** Create a Unicode Object from the whcar_t buffer w of the given 3444 size. 3445 3446 The buffer is copied into the new object. */ 3447 PyObject* PyUnicodeUCS4_FromWideChar(const(wchar)* w, Py_ssize_t size); 3448 /// ditto 3449 3450 alias PyUnicodeUCS4_FromWideChar PyUnicode_FromWideChar; 3451 3452 3453 /** Copies the Unicode Object contents into the wchar_t buffer w. At 3454 most size wchar_t characters are copied. 3455 3456 Note that the resulting wchar_t string may or may not be 3457 0-terminated. It is the responsibility of the caller to make sure 3458 that the wchar_t string is 0-terminated in case this is required by 3459 the application. 3460 3461 Returns the number of wchar_t characters copied (excluding a 3462 possibly trailing 0-termination character) or -1 in case of an 3463 error. */ 3464 Py_ssize_t PyUnicodeUCS4_AsWideChar( 3465 PyUnicodeObject* unicode, 3466 const(wchar)* w, 3467 Py_ssize_t size); 3468 /// ditto 3469 3470 alias PyUnicodeUCS4_AsWideChar PyUnicode_AsWideChar; 3471 3472 3473 /** Create a Unicode Object from the given Unicode code point ordinal. 3474 3475 The ordinal must be in range(0x10000) on narrow Python builds 3476 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 3477 raised in case it is not. 3478 3479 */ 3480 PyObject* PyUnicodeUCS4_FromOrdinal(int ordinal); 3481 /// ditto 3482 3483 alias PyUnicodeUCS4_FromOrdinal PyUnicode_FromOrdinal; 3484 3485 3486 /** Return a Python string holding the default encoded value of the 3487 Unicode object. 3488 3489 The resulting string is cached in the Unicode object for subsequent 3490 usage by this function. The cached version is needed to implement 3491 the character buffer interface and will live (at least) as long as 3492 the Unicode object itself. 3493 3494 The refcount of the string is *not* incremented. 3495 3496 _*** Exported for internal use by the interpreter only !!! *** 3497 3498 */ 3499 PyObject* _PyUnicodeUCS4_AsDefaultEncodedString(PyObject *, const(char)*); 3500 /// ditto 3501 3502 alias _PyUnicodeUCS4_AsDefaultEncodedString _PyUnicode_AsDefaultEncodedString; 3503 3504 3505 /** Returns the currently active default encoding. 3506 3507 The default encoding is currently implemented as run-time settable 3508 process global. This may change in future versions of the 3509 interpreter to become a parameter which is managed on a per-thread 3510 basis. 3511 3512 */ 3513 const(char)* PyUnicodeUCS4_GetDefaultEncoding(); 3514 /// ditto 3515 3516 alias PyUnicodeUCS4_GetDefaultEncoding PyUnicode_GetDefaultEncoding; 3517 3518 3519 /** Sets the currently active default encoding. 3520 3521 Returns 0 on success, -1 in case of an error. 3522 3523 */ 3524 int PyUnicodeUCS4_SetDefaultEncoding(const(char)*encoding); 3525 /// ditto 3526 3527 alias PyUnicodeUCS4_SetDefaultEncoding PyUnicode_SetDefaultEncoding; 3528 3529 3530 /** Create a Unicode object by decoding the encoded string s of the 3531 given size. 3532 Params: 3533 s = encoded string 3534 size = size of buffer 3535 encoding = encoding 3536 errors = error handling 3537 */ 3538 PyObject* PyUnicodeUCS4_Decode( 3539 const(char)* s, 3540 Py_ssize_t size, 3541 const(char)* encoding, 3542 const(char)* errors); 3543 /// ditto 3544 3545 alias PyUnicodeUCS4_Decode PyUnicode_Decode; 3546 3547 3548 version(Python_3_0_Or_Later) { 3549 /** Decode a Unicode object unicode and return the result as Python 3550 object. */ 3551 /// Availability: 3.* 3552 3553 PyObject* PyUnicodeUCS4_AsDecodedObject( 3554 PyObject* unicode, 3555 const(char)* encoding, 3556 const(char)* errors 3557 ); 3558 /// ditto 3559 3560 alias PyUnicodeUCS4_AsDecodedObject PyUnicode_AsDecodedObject; 3561 3562 /** Decode a Unicode object unicode and return the result as Unicode 3563 object. */ 3564 /// Availability: 3.* 3565 3566 PyObject* PyUnicodeUCS4_AsDecodedUnicode( 3567 PyObject* unicode, 3568 const(char)* encoding, 3569 const(char)* errors 3570 ); 3571 /// ditto 3572 3573 alias PyUnicodeUCS4_AsDecodedUnicode PyUnicode_AsDecodedUnicode; 3574 3575 } 3576 3577 /** Encodes a Py_UNICODE buffer of the given size and returns a 3578 Python string object. 3579 Params: 3580 s = Unicode char buffer 3581 size = number of Py_UNICODE chars to encode 3582 encoding = encoding 3583 errors = error handling 3584 */ 3585 PyObject* PyUnicodeUCS4_Encode( 3586 Py_UNICODE* s, 3587 Py_ssize_t size, 3588 const(char)* encoding, 3589 const(char)* errors); 3590 /// ditto 3591 3592 alias PyUnicodeUCS4_Encode PyUnicode_Encode; 3593 3594 3595 /** Encodes a Unicode object and returns the result as Python object. 3596 */ 3597 PyObject* PyUnicodeUCS4_AsEncodedObject( 3598 PyObject* unicode, 3599 const(char)* encoding, 3600 const(char)* errors); 3601 /// ditto 3602 3603 alias PyUnicodeUCS4_AsEncodedObject PyUnicode_AsEncodedObject; 3604 3605 3606 /** Encodes a Unicode object and returns the result as Python string 3607 object. */ 3608 PyObject* PyUnicodeUCS4_AsEncodedString( 3609 PyObject* unicode, 3610 const(char)* encoding, 3611 const(char)* errors); 3612 /// ditto 3613 3614 alias PyUnicodeUCS4_AsEncodedString PyUnicode_AsEncodedString; 3615 3616 3617 version(Python_3_0_Or_Later) { 3618 /** Encodes a Unicode object and returns the result as Unicode 3619 object. */ 3620 /// Availability: >= 3.* 3621 PyObject* PyUnicodeUCS4_AsEncodedUnicode( 3622 PyObject* unicode, 3623 const(char)* encoding, 3624 const(char)* errors 3625 ); 3626 /// ditto 3627 3628 alias PyUnicodeUCS4_AsEncodedUnicode PyUnicode_AsEncodedUnicode; 3629 3630 } 3631 3632 /** 3633 Params: 3634 string = UTF-7 encoded string 3635 length = size of string 3636 errors = error handling 3637 */ 3638 PyObject* PyUnicodeUCS4_DecodeUTF7( 3639 const(char)* string, 3640 Py_ssize_t length, 3641 const(char)* errors); 3642 /// ditto 3643 3644 alias PyUnicodeUCS4_DecodeUTF7 PyUnicode_DecodeUTF7; 3645 3646 3647 /** 3648 Params: 3649 data = Unicode char buffer 3650 length = number of Py_UNICODE chars to encode 3651 base64SetO = Encode RFC2152 Set O characters in base64 3652 base64WhiteSpace = Encode whitespace (sp, ht, nl, cr) in base64 3653 errors = error handling 3654 */ 3655 PyObject* PyUnicodeUCS4_EncodeUTF7( 3656 Py_UNICODE* data, 3657 Py_ssize_t length, 3658 int encodeSetO, 3659 int encodeWhiteSpace, 3660 const(char)* errors 3661 ); 3662 /// ditto 3663 3664 alias PyUnicodeUCS4_EncodeUTF7 PyUnicode_EncodeUTF7; 3665 3666 3667 /// _ 3668 PyObject* PyUnicodeUCS4_DecodeUTF8( 3669 const(char)* string, 3670 Py_ssize_t length, 3671 const(char)* errors); 3672 /// ditto 3673 3674 alias PyUnicodeUCS4_DecodeUTF8 PyUnicode_DecodeUTF8; 3675 3676 /// _ 3677 PyObject* PyUnicodeUCS4_DecodeUTF8Stateful( 3678 const(char)* string, 3679 Py_ssize_t length, 3680 const(char)* errors, 3681 Py_ssize_t* consumed 3682 ); 3683 /// ditto 3684 3685 alias PyUnicodeUCS4_DecodeUTF8Stateful PyUnicode_DecodeUTF8Stateful; 3686 3687 /// _ 3688 PyObject* PyUnicodeUCS4_AsUTF8String(PyObject* unicode); 3689 /// ditto 3690 3691 alias PyUnicodeUCS4_AsUTF8String PyUnicode_AsUTF8String; 3692 3693 /// _ 3694 PyObject* PyUnicodeUCS4_EncodeUTF8( 3695 Py_UNICODE* data, 3696 Py_ssize_t length, 3697 const(char) *errors); 3698 /// ditto 3699 3700 alias PyUnicodeUCS4_EncodeUTF8 PyUnicode_EncodeUTF8; 3701 3702 3703 /** Decodes length bytes from a UTF-16 encoded buffer string and returns 3704 the corresponding Unicode object. 3705 3706 errors (if non-NULL) defines the error handling. It defaults 3707 to "strict". 3708 3709 If byteorder is non-NULL, the decoder starts decoding using the 3710 given byte order: 3711 3712 *byteorder == -1: little endian 3713 *byteorder == 0: native order 3714 *byteorder == 1: big endian 3715 3716 In native mode, the first two bytes of the stream are checked for a 3717 BOM mark. If found, the BOM mark is analysed, the byte order 3718 adjusted and the BOM skipped. In the other modes, no BOM mark 3719 interpretation is done. After completion, *byteorder is set to the 3720 current byte order at the end of input data. 3721 3722 If byteorder is NULL, the codec starts in native order mode. 3723 3724 */ 3725 PyObject* PyUnicodeUCS4_DecodeUTF16( 3726 const(char)* string, 3727 Py_ssize_t length, 3728 const(char)* errors, 3729 int* byteorder); 3730 /// ditto 3731 3732 alias PyUnicodeUCS4_DecodeUTF16 PyUnicode_DecodeUTF16; 3733 3734 /** 3735 Params: 3736 string = UTF-16 encoded string 3737 length = size of string 3738 errors = error handling 3739 byteorder = pointer to byteorder to use 0=native;-1=LE,1=BE; updated on exit 3740 consumed = bytes consumed 3741 */ 3742 PyObject* PyUnicodeUCS4_DecodeUTF16Stateful( 3743 const(char)* string, 3744 Py_ssize_t length, 3745 const(char)* errors, 3746 int* byteorder, 3747 Py_ssize_t* consumed 3748 ); 3749 /// ditto 3750 3751 alias PyUnicodeUCS4_DecodeUTF16Stateful PyUnicode_DecodeUTF16Stateful; 3752 3753 /** Returns a Python string using the UTF-16 encoding in native byte 3754 order. The string always starts with a BOM mark. */ 3755 PyObject* PyUnicodeUCS4_AsUTF16String(PyObject *unicode); 3756 /// ditto 3757 3758 alias PyUnicodeUCS4_AsUTF16String PyUnicode_AsUTF16String; 3759 3760 /** Returns a Python string object holding the UTF-16 encoded value of 3761 the Unicode data. 3762 3763 If byteorder is not 0, output is written according to the following 3764 byte order: 3765 3766 byteorder == -1: little endian 3767 byteorder == 0: native byte order (writes a BOM mark) 3768 byteorder == 1: big endian 3769 3770 If byteorder is 0, the output string will always start with the 3771 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 3772 prepended. 3773 3774 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 3775 UCS-2. This trick makes it possible to add full UTF-16 capabilities 3776 at a later point without compromising the APIs. 3777 3778 */ 3779 PyObject* PyUnicodeUCS4_EncodeUTF16( 3780 Py_UNICODE* data, 3781 Py_ssize_t length, 3782 const(char)* errors, 3783 int byteorder 3784 ); 3785 /// ditto 3786 3787 alias PyUnicodeUCS4_EncodeUTF16 PyUnicode_EncodeUTF16; 3788 3789 3790 /// _ 3791 PyObject* PyUnicodeUCS4_DecodeUnicodeEscape( 3792 const(char)* string, 3793 Py_ssize_t length, 3794 const(char)* errors); 3795 /// ditto 3796 3797 alias PyUnicodeUCS4_DecodeUnicodeEscape PyUnicode_DecodeUnicodeEscape; 3798 3799 /// _ 3800 PyObject* PyUnicodeUCS4_AsUnicodeEscapeString( 3801 PyObject* unicode); 3802 /// ditto 3803 3804 alias PyUnicodeUCS4_AsUnicodeEscapeString PyUnicode_AsUnicodeEscapeString; 3805 3806 /// _ 3807 PyObject* PyUnicodeUCS4_EncodeUnicodeEscape( 3808 Py_UNICODE* data, 3809 Py_ssize_t length); 3810 /// ditto 3811 3812 alias PyUnicodeUCS4_EncodeUnicodeEscape PyUnicode_EncodeUnicodeEscape; 3813 3814 /** 3815 Params: 3816 string = Raw-Unicode-Escape encoded string 3817 length = size of string 3818 errors = error handling 3819 */ 3820 PyObject* PyUnicodeUCS4_DecodeRawUnicodeEscape( 3821 const(char)* string, 3822 Py_ssize_t length, 3823 const(char)* errors); 3824 /// ditto 3825 3826 alias PyUnicodeUCS4_DecodeRawUnicodeEscape PyUnicode_DecodeRawUnicodeEscape; 3827 3828 /// _ 3829 PyObject* PyUnicodeUCS4_AsRawUnicodeEscapeString(PyObject* unicode); 3830 /// ditto 3831 3832 alias PyUnicodeUCS4_AsRawUnicodeEscapeString PyUnicode_AsRawUnicodeEscapeString; 3833 3834 /// _ 3835 PyObject* PyUnicodeUCS4_EncodeRawUnicodeEscape( 3836 Py_UNICODE* data, Py_ssize_t length); 3837 /// ditto 3838 3839 alias PyUnicodeUCS4_EncodeRawUnicodeEscape PyUnicode_EncodeRawUnicodeEscape; 3840 3841 3842 /// _ 3843 PyObject* _PyUnicodeUCS4_DecodeUnicodeInternal( 3844 const(char)* string, 3845 Py_ssize_t length, 3846 const(char)* errors); 3847 /// ditto 3848 3849 alias _PyUnicodeUCS4_DecodeUnicodeInternal _PyUnicode_DecodeUnicodeInternal; 3850 3851 3852 /** 3853 Params: 3854 string = Latin-1 encoded string 3855 length = size of string 3856 errors = error handling 3857 */ 3858 PyObject* PyUnicodeUCS4_DecodeLatin1( 3859 const(char)* string, 3860 Py_ssize_t length, 3861 const(char)* errors); 3862 /// ditto 3863 3864 alias PyUnicodeUCS4_DecodeLatin1 PyUnicode_DecodeLatin1; 3865 3866 /// _ 3867 PyObject* PyUnicodeUCS4_AsLatin1String(PyObject *unicode); 3868 /// ditto 3869 3870 alias PyUnicodeUCS4_AsLatin1String PyUnicode_AsLatin1String; 3871 3872 /** 3873 Params: 3874 data = Unicode char buffer 3875 length = Number of Py_UNICODE chars to encode 3876 errors = error handling 3877 */ 3878 PyObject* PyUnicodeUCS4_EncodeLatin1( 3879 Py_UNICODE* data, 3880 Py_ssize_t length, 3881 const(char)* errors); 3882 /// ditto 3883 3884 alias PyUnicodeUCS4_EncodeLatin1 PyUnicode_EncodeLatin1; 3885 3886 3887 /** 3888 Params: 3889 data = Unicode char buffer 3890 length = Number of Py_UNICODE chars to encode 3891 errors = error handling 3892 */ 3893 PyObject* PyUnicodeUCS4_DecodeASCII( 3894 const(char)* string, 3895 Py_ssize_t length, 3896 const(char)* errors); 3897 /// ditto 3898 3899 alias PyUnicodeUCS4_DecodeASCII PyUnicode_DecodeASCII; 3900 3901 /// _ 3902 PyObject* PyUnicodeUCS4_AsASCIIString(PyObject *unicode); 3903 /// ditto 3904 3905 alias PyUnicodeUCS4_AsASCIIString PyUnicode_AsASCIIString; 3906 3907 /** 3908 Params: 3909 data = Unicode char buffer 3910 length = Number of Py_UNICODE chars to encode 3911 errors = error handling 3912 */ 3913 PyObject* PyUnicodeUCS4_EncodeASCII( 3914 Py_UNICODE* data, 3915 Py_ssize_t length, 3916 const(char)* errors); 3917 /// ditto 3918 3919 alias PyUnicodeUCS4_EncodeASCII PyUnicode_EncodeASCII; 3920 3921 3922 /** 3923 Params: 3924 string = Encoded string 3925 length = size of string 3926 mapping = character mapping (char ordinal -> unicode ordinal) 3927 errors = error handling 3928 */ 3929 PyObject* PyUnicodeUCS4_DecodeCharmap( 3930 const(char)* string, 3931 Py_ssize_t length, 3932 PyObject* mapping, 3933 const(char)* errors 3934 ); 3935 /// ditto 3936 3937 alias PyUnicodeUCS4_DecodeCharmap PyUnicode_DecodeCharmap; 3938 3939 /** 3940 Params: 3941 unicode = Unicode object 3942 mapping = character mapping (unicode ordinal -> char ordinal) 3943 */ 3944 PyObject* PyUnicodeUCS4_AsCharmapString( 3945 PyObject* unicode, 3946 PyObject* mapping); 3947 /// ditto 3948 3949 alias PyUnicodeUCS4_AsCharmapString PyUnicode_AsCharmapString; 3950 3951 /** 3952 Params: 3953 data = Unicode char buffer 3954 length = Number of Py_UNICODE chars to encode 3955 mapping = character mapping (unicode ordinal -> char ordinal) 3956 errors = error handling 3957 */ 3958 PyObject* PyUnicodeUCS4_EncodeCharmap( 3959 Py_UNICODE* data, 3960 Py_ssize_t length, 3961 PyObject* mapping, 3962 const(char)* errors 3963 ); 3964 /// ditto 3965 3966 alias PyUnicodeUCS4_EncodeCharmap PyUnicode_EncodeCharmap; 3967 3968 /** Translate a Py_UNICODE buffer of the given length by applying a 3969 character mapping table to it and return the resulting Unicode 3970 object. 3971 3972 The mapping table must map Unicode ordinal integers to Unicode 3973 ordinal integers or None (causing deletion of the character). 3974 3975 Mapping tables may be dictionaries or sequences. Unmapped character 3976 ordinals (ones which cause a LookupError) are left untouched and 3977 are copied as-is. 3978 3979 */ 3980 PyObject* PyUnicodeUCS4_TranslateCharmap( 3981 Py_UNICODE* data, 3982 Py_ssize_t length, 3983 PyObject* table, 3984 const(char)* errors 3985 ); 3986 /// ditto 3987 3988 alias PyUnicodeUCS4_TranslateCharmap PyUnicode_TranslateCharmap; 3989 3990 3991 version (Windows) { 3992 /// Availability: Windows only 3993 PyObject* PyUnicodeUCS4_DecodeMBCS( 3994 const(char)* string, 3995 Py_ssize_t length, 3996 const(char)* errors); 3997 /// ditto 3998 3999 alias PyUnicodeUCS4_DecodeMBCS PyUnicode_DecodeMBCS; 4000 4001 /// Availability: Windows only 4002 PyObject* PyUnicodeUCS4_AsMBCSString(PyObject* unicode); 4003 /// ditto 4004 4005 alias PyUnicodeUCS4_AsMBCSString PyUnicode_AsMBCSString; 4006 4007 /// Availability: Windows only 4008 PyObject* PyUnicodeUCS4_EncodeMBCS( 4009 Py_UNICODE* data, 4010 Py_ssize_t length, 4011 const(char)* errors); 4012 /// ditto 4013 4014 alias PyUnicodeUCS4_EncodeMBCS PyUnicode_EncodeMBCS; 4015 4016 } 4017 /** Takes a Unicode string holding a decimal value and writes it into 4018 an output buffer using standard ASCII digit codes. 4019 4020 The output buffer has to provide at least length+1 bytes of storage 4021 area. The output string is 0-terminated. 4022 4023 The encoder converts whitespace to ' ', decimal characters to their 4024 corresponding ASCII digit and all other Latin-1 characters except 4025 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 4026 are treated as errors. This includes embedded NULL bytes. 4027 4028 Error handling is defined by the errors argument: 4029 4030 NULL or "strict": raise a ValueError 4031 "ignore": ignore the wrong characters (these are not copied to the 4032 output buffer) 4033 "replace": replaces illegal characters with '?' 4034 4035 Returns 0 on success, -1 on failure. 4036 4037 */ 4038 int PyUnicodeUCS4_EncodeDecimal( 4039 Py_UNICODE* s, 4040 Py_ssize_t length, 4041 char* output, 4042 const(char)* errors); 4043 /// ditto 4044 4045 alias PyUnicodeUCS4_EncodeDecimal PyUnicode_EncodeDecimal; 4046 4047 4048 /** Concat two strings giving a new Unicode string. */ 4049 PyObject* PyUnicodeUCS4_Concat( 4050 PyObject* left, 4051 PyObject* right); 4052 /// ditto 4053 4054 alias PyUnicodeUCS4_Concat PyUnicode_Concat; 4055 4056 4057 version(Python_3_0_Or_Later) { 4058 /** Concat two strings and put the result in *pleft 4059 (sets *pleft to NULL on error) 4060 Params: 4061 pleft = Pointer to left string 4062 right = Right string 4063 */ 4064 /// Availability: 3.* 4065 4066 void PyUnicodeUCS4_Append( 4067 PyObject** pleft, 4068 PyObject* right 4069 ); 4070 /// ditto 4071 4072 alias PyUnicodeUCS4_Append PyUnicode_Append; 4073 4074 4075 /** Concat two strings, put the result in *pleft and drop the right object 4076 (sets *pleft to NULL on error) 4077 Params: 4078 pleft = Pointer to left string 4079 */ 4080 /// Availability: 3.* 4081 void PyUnicodeUCS4_AppendAndDel( 4082 PyObject** pleft, 4083 PyObject* right 4084 ); 4085 /// ditto 4086 4087 alias PyUnicodeUCS4_AppendAndDel PyUnicode_AppendAndDel; 4088 4089 } 4090 4091 /** Split a string giving a list of Unicode strings. 4092 4093 If sep is NULL, splitting will be done at all whitespace 4094 substrings. Otherwise, splits occur at the given separator. 4095 4096 At most maxsplit splits will be done. If negative, no limit is set. 4097 4098 Separators are not included in the resulting list. 4099 4100 */ 4101 PyObject* PyUnicodeUCS4_Split( 4102 PyObject* s, 4103 PyObject* sep, 4104 Py_ssize_t maxsplit); 4105 /// ditto 4106 4107 alias PyUnicodeUCS4_Split PyUnicode_Split; 4108 4109 4110 /** Ditto PyUnicode_Split, but split at line breaks. 4111 4112 CRLF is considered to be one line break. Line breaks are not 4113 included in the resulting list. */ 4114 PyObject* PyUnicodeUCS4_Splitlines( 4115 PyObject* s, 4116 int keepends); 4117 /// ditto 4118 4119 alias PyUnicodeUCS4_Splitlines PyUnicode_Splitlines; 4120 4121 4122 version(Python_2_5_Or_Later) { 4123 /** Partition a string using a given separator. */ 4124 /// Availability: >= 2.5 4125 PyObject* PyUnicodeUCS4_Partition( 4126 PyObject* s, 4127 PyObject* sep 4128 ); 4129 /// ditto 4130 4131 alias PyUnicodeUCS4_Partition PyUnicode_Partition; 4132 4133 4134 /** Partition a string using a given separator, searching from the end 4135 of the string. */ 4136 4137 PyObject* PyUnicodeUCS4_RPartition( 4138 PyObject* s, 4139 PyObject* sep 4140 ); 4141 /// ditto 4142 4143 alias PyUnicodeUCS4_RPartition PyUnicode_RPartition; 4144 4145 } 4146 4147 /** Split a string giving a list of Unicode strings. 4148 4149 If sep is NULL, splitting will be done at all whitespace 4150 substrings. Otherwise, splits occur at the given separator. 4151 4152 At most maxsplit splits will be done. But unlike PyUnicode_Split 4153 PyUnicode_RSplit splits from the end of the string. If negative, 4154 no limit is set. 4155 4156 Separators are not included in the resulting list. 4157 4158 */ 4159 PyObject* PyUnicodeUCS4_RSplit( 4160 PyObject* s, 4161 PyObject* sep, 4162 Py_ssize_t maxsplit); 4163 /// ditto 4164 4165 alias PyUnicodeUCS4_RSplit PyUnicode_RSplit; 4166 4167 4168 /** Translate a string by applying a character mapping table to it and 4169 return the resulting Unicode object. 4170 4171 The mapping table must map Unicode ordinal integers to Unicode 4172 ordinal integers or None (causing deletion of the character). 4173 4174 Mapping tables may be dictionaries or sequences. Unmapped character 4175 ordinals (ones which cause a LookupError) are left untouched and 4176 are copied as-is. 4177 4178 */ 4179 PyObject* PyUnicodeUCS4_Translate( 4180 PyObject* str, 4181 PyObject* table, 4182 const(char)* errors); 4183 /// ditto 4184 4185 alias PyUnicodeUCS4_Translate PyUnicode_Translate; 4186 4187 4188 /** Join a sequence of strings using the given separator and return 4189 the resulting Unicode string. */ 4190 PyObject* PyUnicodeUCS4_Join( 4191 PyObject* separator, 4192 PyObject* seq); 4193 /// ditto 4194 4195 alias PyUnicodeUCS4_Join PyUnicode_Join; 4196 4197 4198 /** Return 1 if substr matches str[start:end] at the given tail end, 0 4199 otherwise. */ 4200 Py_ssize_t PyUnicodeUCS4_Tailmatch( 4201 PyObject* str, 4202 PyObject* substr, 4203 Py_ssize_t start, 4204 Py_ssize_t end, 4205 int direction 4206 ); 4207 /// ditto 4208 4209 alias PyUnicodeUCS4_Tailmatch PyUnicode_Tailmatch; 4210 4211 4212 /** Return the first position of substr in str[start:end] using the 4213 given search direction or -1 if not found. -2 is returned in case 4214 an error occurred and an exception is set. */ 4215 Py_ssize_t PyUnicodeUCS4_Find( 4216 PyObject* str, 4217 PyObject* substr, 4218 Py_ssize_t start, 4219 Py_ssize_t end, 4220 int direction 4221 ); 4222 /// ditto 4223 4224 alias PyUnicodeUCS4_Find PyUnicode_Find; 4225 4226 4227 /** Count the number of occurrences of substr in str[start:end]. */ 4228 Py_ssize_t PyUnicodeUCS4_Count( 4229 PyObject* str, 4230 PyObject* substr, 4231 Py_ssize_t start, 4232 Py_ssize_t end); 4233 /// ditto 4234 4235 alias PyUnicodeUCS4_Count PyUnicode_Count; 4236 4237 4238 /** Replace at most maxcount occurrences of substr in str with replstr 4239 and return the resulting Unicode object. */ 4240 PyObject* PyUnicodeUCS4_Replace( 4241 PyObject* str, 4242 PyObject* substr, 4243 PyObject* replstr, 4244 Py_ssize_t maxcount 4245 ); 4246 /// ditto 4247 4248 alias PyUnicodeUCS4_Replace PyUnicode_Replace; 4249 4250 4251 /** Compare two strings and return -1, 0, 1 for less than, equal, 4252 greater than resp. */ 4253 int PyUnicodeUCS4_Compare(PyObject* left, PyObject* right); 4254 /// ditto 4255 4256 alias PyUnicodeUCS4_Compare PyUnicode_Compare; 4257 4258 version(Python_3_0_Or_Later) { 4259 /** Compare two strings and return -1, 0, 1 for less than, equal, 4260 greater than resp. 4261 Params: 4262 left = 4263 right = ASCII-encoded string 4264 */ 4265 /// Availability: 3.* 4266 int PyUnicodeUCS4_CompareWithASCIIString( 4267 PyObject* left, 4268 const(char)* right 4269 ); 4270 /// ditto 4271 4272 alias PyUnicodeUCS4_CompareWithASCIIString PyUnicode_CompareWithASCIIString; 4273 4274 } 4275 4276 version(Python_2_5_Or_Later) { 4277 /** Rich compare two strings and return one of the following: 4278 4279 - NULL in case an exception was raised 4280 - Py_True or Py_False for successfuly comparisons 4281 - Py_NotImplemented in case the type combination is unknown 4282 4283 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 4284 case the conversion of the arguments to Unicode fails with a 4285 UnicodeDecodeError. 4286 4287 Possible values for op: 4288 4289 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 4290 4291 */ 4292 /// Availability: >= 2.5 4293 PyObject* PyUnicodeUCS4_RichCompare( 4294 PyObject* left, 4295 PyObject* right, 4296 int op 4297 ); 4298 /// ditto 4299 4300 alias PyUnicodeUCS4_RichCompare PyUnicode_RichCompare; 4301 4302 } 4303 4304 /** Apply a argument tuple or dictionary to a format string and return 4305 the resulting Unicode string. */ 4306 PyObject* PyUnicodeUCS4_Format(PyObject* format, PyObject* args); 4307 /// ditto 4308 4309 alias PyUnicodeUCS4_Format PyUnicode_Format; 4310 4311 4312 /** Checks whether element is contained in container and return 1/0 4313 accordingly. 4314 4315 element has to coerce to an one element Unicode string. -1 is 4316 returned in case of an error. */ 4317 int PyUnicodeUCS4_Contains(PyObject* container, PyObject* element); 4318 /// ditto 4319 4320 alias PyUnicodeUCS4_Contains PyUnicode_Contains; 4321 4322 4323 version(Python_3_0_Or_Later) { 4324 /** Checks whether argument is a valid identifier. */ 4325 /// Availability: 3.* 4326 int PyUnicodeUCS4_IsIdentifier(PyObject* s); 4327 /// ditto 4328 4329 alias PyUnicodeUCS4_IsIdentifier PyUnicode_IsIdentifier; 4330 4331 } 4332 4333 4334 /// _ 4335 int _PyUnicodeUCS4_IsLowercase(Py_UNICODE ch); 4336 /// ditto 4337 4338 alias _PyUnicodeUCS4_IsLowercase _PyUnicode_IsLowercase; 4339 4340 /// _ 4341 int _PyUnicodeUCS4_IsUppercase(Py_UNICODE ch); 4342 /// ditto 4343 4344 alias _PyUnicodeUCS4_IsUppercase _PyUnicode_IsUppercase; 4345 4346 /// _ 4347 int _PyUnicodeUCS4_IsTitlecase(Py_UNICODE ch); 4348 /// ditto 4349 4350 alias _PyUnicodeUCS4_IsTitlecase _PyUnicode_IsTitlecase; 4351 4352 /// _ 4353 int _PyUnicodeUCS4_IsWhitespace(Py_UNICODE ch); 4354 /// ditto 4355 4356 alias _PyUnicodeUCS4_IsWhitespace _PyUnicode_IsWhitespace; 4357 4358 /// _ 4359 int _PyUnicodeUCS4_IsLinebreak(Py_UNICODE ch); 4360 /// ditto 4361 4362 alias _PyUnicodeUCS4_IsLinebreak _PyUnicode_IsLinebreak; 4363 4364 /// _ 4365 Py_UNICODE _PyUnicodeUCS4_ToLowercase(Py_UNICODE ch); 4366 /// ditto 4367 4368 alias _PyUnicodeUCS4_ToLowercase _PyUnicode_ToLowercase; 4369 4370 /// _ 4371 Py_UNICODE _PyUnicodeUCS4_ToUppercase(Py_UNICODE ch); 4372 /// ditto 4373 4374 alias _PyUnicodeUCS4_ToUppercase _PyUnicode_ToUppercase; 4375 4376 /// _ 4377 Py_UNICODE _PyUnicodeUCS4_ToTitlecase(Py_UNICODE ch); 4378 /// ditto 4379 4380 alias _PyUnicodeUCS4_ToTitlecase _PyUnicode_ToTitlecase; 4381 4382 /// _ 4383 int _PyUnicodeUCS4_ToDecimalDigit(Py_UNICODE ch); 4384 /// ditto 4385 4386 alias _PyUnicodeUCS4_ToDecimalDigit _PyUnicode_ToDecimalDigit; 4387 4388 /// _ 4389 int _PyUnicodeUCS4_ToDigit(Py_UNICODE ch); 4390 /// ditto 4391 4392 alias _PyUnicodeUCS4_ToDigit _PyUnicode_ToDigit; 4393 4394 /// _ 4395 double _PyUnicodeUCS4_ToNumeric(Py_UNICODE ch); 4396 /// ditto 4397 4398 alias _PyUnicodeUCS4_ToNumeric _PyUnicode_ToNumeric; 4399 4400 /// _ 4401 int _PyUnicodeUCS4_IsDecimalDigit(Py_UNICODE ch); 4402 /// ditto 4403 4404 alias _PyUnicodeUCS4_IsDecimalDigit _PyUnicode_IsDecimalDigit; 4405 4406 /// _ 4407 int _PyUnicodeUCS4_IsDigit(Py_UNICODE ch); 4408 /// ditto 4409 4410 alias _PyUnicodeUCS4_IsDigit _PyUnicode_IsDigit; 4411 4412 /// _ 4413 int _PyUnicodeUCS4_IsNumeric(Py_UNICODE ch); 4414 /// ditto 4415 4416 alias _PyUnicodeUCS4_IsNumeric _PyUnicode_IsNumeric; 4417 4418 /// _ 4419 int _PyUnicodeUCS4_IsAlpha(Py_UNICODE ch); 4420 /// ditto 4421 4422 alias _PyUnicodeUCS4_IsAlpha _PyUnicode_IsAlpha; 4423 4424 } 4425 version(Python_3_0_Or_Later) { 4426 /// Availability: 3.* 4427 size_t Py_UNICODE_strlen(const(Py_UNICODE)* u); 4428 4429 /// Availability: 3.* 4430 Py_UNICODE* Py_UNICODE_strcpy(Py_UNICODE* s1, const(Py_UNICODE)* s2); 4431 4432 version(Python_3_2_Or_Later) { 4433 /// Availability: >= 3.2 4434 Py_UNICODE* Py_UNICODE_strcat(Py_UNICODE* s1, const(Py_UNICODE)* s2); 4435 } 4436 4437 /// Availability: 3.* 4438 Py_UNICODE* Py_UNICODE_strncpy( 4439 Py_UNICODE* s1, 4440 const(Py_UNICODE)* s2, 4441 size_t n); 4442 4443 /// Availability: 3.* 4444 int Py_UNICODE_strcmp( 4445 const(Py_UNICODE)* s1, 4446 const(Py_UNICODE)* s2 4447 ); 4448 4449 version(Python_3_2_Or_Later) { 4450 /// Availability: >= 3.2 4451 int Py_UNICODE_strncmp( 4452 const(Py_UNICODE)* s1, 4453 const(Py_UNICODE)* s2, 4454 size_t n 4455 ); 4456 } 4457 4458 /// Availability: 3.* 4459 Py_UNICODE* Py_UNICODE_strchr( 4460 const(Py_UNICODE)* s, 4461 Py_UNICODE c 4462 ); 4463 4464 version(Python_3_2_Or_Later) { 4465 /// Availability: >= 3.2 4466 Py_UNICODE* Py_UNICODE_strrchr( 4467 const(Py_UNICODE)* s, 4468 Py_UNICODE c 4469 ); 4470 } 4471 4472 version(Python_3_2_Or_Later) { 4473 /** Create a copy of a unicode string ending with a nul character. Return NULL 4474 and raise a MemoryError exception on memory allocation failure, otherwise 4475 return a new allocated buffer (use PyMem_Free() to free the buffer). */ 4476 /// Availability: >= 3.2 4477 4478 Py_UNICODE* PyUnicode_AsUnicodeCopy( 4479 PyObject* unicode 4480 ); 4481 } 4482 } 4483 4484 4485 /// _ 4486 int _PyUnicode_IsTitlecase( 4487 Py_UCS4 ch /* Unicode character */ 4488 ); 4489 4490 /// _ 4491 int _PyUnicode_IsXidStart( 4492 Py_UCS4 ch /* Unicode character */ 4493 ); 4494 /** Externally visible for str.strip(unicode) */ 4495 PyObject* _PyUnicode_XStrip(PyUnicodeObject* self, int striptype, 4496 PyObject *sepobj 4497 ); 4498 version(Python_3_0_Or_Later) { 4499 version(Python_3_2_Or_Later) { 4500 /** Using the current locale, insert the thousands grouping 4501 into the string pointed to by buffer. For the argument descriptions, 4502 see Objects/stringlib/localeutil.h */ 4503 /// Availability: >= 3.2 4504 Py_ssize_t _PyUnicode_InsertThousandsGroupingLocale( 4505 Py_UNICODE* buffer, 4506 Py_ssize_t n_buffer, 4507 Py_UNICODE* digits, 4508 Py_ssize_t n_digits, 4509 Py_ssize_t min_width); 4510 } 4511 4512 /** Using explicit passed-in values, insert the thousands grouping 4513 into the string pointed to by buffer. For the argument descriptions, 4514 see Objects/stringlib/localeutil.h */ 4515 /// Availability: 3.* 4516 Py_ssize_t _PyUnicode_InsertThousandsGrouping( 4517 Py_UNICODE* buffer, 4518 Py_ssize_t n_buffer, 4519 Py_UNICODE* digits, 4520 Py_ssize_t n_digits, 4521 Py_ssize_t min_width, 4522 const(char)* grouping, 4523 const(char)* thousands_sep); 4524 } 4525 4526 version(Python_3_2_Or_Later) { 4527 /// Availability: >= 3.2 4528 PyObject* PyUnicode_TransformDecimalToASCII( 4529 Py_UNICODE *s, /* Unicode buffer */ 4530 Py_ssize_t length /* Number of Py_UNICODE chars to transform */ 4531 ); 4532 /* --- File system encoding ---------------------------------------------- */ 4533 4534 /** ParseTuple converter: encode str objects to bytes using 4535 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ 4536 /// Availability: >= 3.2 4537 int PyUnicode_FSConverter(PyObject*, void*); 4538 4539 /** ParseTuple converter: decode bytes objects to unicode using 4540 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ 4541 /// Availability: >= 3.2 4542 int PyUnicode_FSDecoder(PyObject*, void*); 4543 4544 /** Decode a null-terminated string using Py_FileSystemDefaultEncoding 4545 and the "surrogateescape" error handler. 4546 4547 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4548 encoding. 4549 4550 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. 4551 */ 4552 /// Availability: >= 3.2 4553 PyObject* PyUnicode_DecodeFSDefault( 4554 const(char)* s /* encoded string */ 4555 ); 4556 4557 /** Decode a string using Py_FileSystemDefaultEncoding 4558 and the "surrogateescape" error handler. 4559 4560 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4561 encoding. 4562 */ 4563 /// Availability: >= 3.2 4564 PyObject* PyUnicode_DecodeFSDefaultAndSize( 4565 const(char)* s, /* encoded string */ 4566 Py_ssize_t size /* size */ 4567 ); 4568 4569 /** Encode a Unicode object to Py_FileSystemDefaultEncoding with the 4570 "surrogateescape" error handler, and return bytes. 4571 4572 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 4573 encoding. 4574 */ 4575 /// Availability: >= 3.2 4576 PyObject* PyUnicode_EncodeFSDefault( 4577 PyObject* unicode 4578 ); 4579 } 4580 4581 /* 4582 alias _PyUnicode_IsWhitespace Py_UNICODE_ISSPACE; 4583 alias _PyUnicode_IsLowercase Py_UNICODE_ISLOWER; 4584 alias _PyUnicode_IsUppercase Py_UNICODE_ISUPPER; 4585 alias _PyUnicode_IsTitlecase Py_UNICODE_ISTITLE; 4586 alias _PyUnicode_IsLinebreak Py_UNICODE_ISLINEBREAK; 4587 alias _PyUnicode_ToLowercase Py_UNICODE_TOLOWER; 4588 alias _PyUnicode_ToUppercase Py_UNICODE_TOUPPER; 4589 alias _PyUnicode_ToTitlecase Py_UNICODE_TOTITLE; 4590 alias _PyUnicode_IsDecimalDigit Py_UNICODE_ISDECIMAL; 4591 alias _PyUnicode_IsDigit Py_UNICODE_ISDIGIT; 4592 alias _PyUnicode_IsNumeric Py_UNICODE_ISNUMERIC; 4593 alias _PyUnicode_ToDecimalDigit Py_UNICODE_TODECIMAL; 4594 alias _PyUnicode_ToDigit Py_UNICODE_TODIGIT; 4595 alias _PyUnicode_ToNumeric Py_UNICODE_TONUMERIC; 4596 alias _PyUnicode_IsAlpha Py_UNICODE_ISALPHA; 4597 */ 4598 4599 /// _ 4600 int Py_UNICODE_ISALNUM()(Py_UNICODE ch) { 4601 return ( 4602 Py_UNICODE_ISALPHA(ch) 4603 || Py_UNICODE_ISDECIMAL(ch) 4604 || Py_UNICODE_ISDIGIT(ch) 4605 || Py_UNICODE_ISNUMERIC(ch) 4606 ); 4607 } 4608 4609 /// _ 4610 void Py_UNICODE_COPY()(void* target, void* source, size_t length) { 4611 memcpy(target, source, cast(uint)(length* Py_UNICODE.sizeof)); 4612 } 4613 4614 /// _ 4615 void Py_UNICODE_FILL()(Py_UNICODE* target, Py_UNICODE value, size_t length) { 4616 for (size_t i = 0; i < length; i++) { 4617 target[i] = value; 4618 } 4619 } 4620 4621 /// _ 4622 int Py_UNICODE_MATCH()(PyUnicodeObject* string, size_t offset, 4623 PyUnicodeObject* substring 4624 ) 4625 { 4626 return ( 4627 (*(string.str + offset) == *(substring.str)) 4628 && !memcmp(string.str + offset, substring.str, 4629 substring.length * Py_UNICODE.sizeof 4630 ) 4631 ); 4632 } 4633 4634