opennurbs_unicode.h
1 /* $NoKeywords: $ */
2 /*
3 //
4 // Copyright (c) 1993-2012 Robert McNeel & Associates. All rights reserved.
5 // OpenNURBS, Rhinoceros, and Rhino3D are registered trademarks of Robert
6 // McNeel & Associates.
7 //
8 // THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
9 // ALL IMPLIED WARRANTIES OF FITNESS FOR ANY PARTICULAR PURPOSE AND OF
10 // MERCHANTABILITY ARE HEREBY DISCLAIMED.
11 //
12 // For complete openNURBS copyright information see <http://www.opennurbs.org>.
13 //
14 ////////////////////////////////////////////////////////////////
15 */
16 
17 #if !defined(OPENNURBS_UNICODE_INC_)
18 #define OPENNURBS_UNICODE_INC_
19 
20 ON_BEGIN_EXTERNC
21 
22 enum ON_UnicodeEncoding
23 {
24  ON_UTF_unset=0, //
25  ON_not_UTF, // not a UTF encoding
26  ON_UTF_8, // UTF-8 big endian byte order
27  ON_UTF_16, // UTF-16 in native CPU byte order
28  ON_UTF_16BE, // UTF-16 big endian byte order
29  ON_UTF_16LE, // UTF-16 little endian byte order
30  ON_UTF_32, // UTF-32 in native CPU byte order
31  ON_UTF_32BE, // UTF-32 big endian byte order
32  ON_UTF_32LE // UTF-32 little endian CPU byte order
33 };
34 
35 enum ON_UnicodeCodePoint
36 {
37  // UTF-8 encodings:
38  // The UTF-8 encoding for codepoint values from 0 to 127 is a single single byte (char).
39  // The UTF-8 encoding for codepoint values >= 128 require multiple bytes.
40  // UTF-16 encodings:
41  // The UTF-16 encoding of every codepoint in this enum except Wastebasket is a single word (unsigned short).
42 
43  ON_NullCodePoint = 0x00, // nullptr control U+0000 (decimal 0)
44  ON_Backspace = 0x08, // BACKSPACE control U+0008 (decimal 8)
45  ON_Tab = 0x09, // CHARACTER TABULATION control U+0009 (decimal 9)
46  ON_LineFeed = 0x0A, // LINE FEED control U+000A (decimal 10)
47  ON_VerticalTab = 0x0B, // LINE TABULATION control U+000B (decimal 11)
48  ON_FormFeed = 0x0C, // FORM FEED control U+000C (decimal 12)
49  ON_CarriageReturn = 0x0D, // CARRIAGE RETURN control U+000D (decimal 13)
50  ON_Escape = 0x1B, // CARRIAGE RETURN control U+001B (decimal 27)
51  ON_Space = 0x20, // SPACE U+0020 (decimal 32)
52  ON_Slash = 0x2F, // SOLIDUS U+002F (decimal 47)
53  ON_Backslash = 0x5C, // REVERSE SOLIDUS U+005C (decimal 92)ere
54  ON_Pipe = 0x7C, // VERTICAL LINE U+007C (decimal 124)
55 
56  //
57  // NOTE: UTF-8 encodings of the codepoint values below this comment require multiple bytes.
58  //
59  ON_NextLine = 0x0085, // NEXT LINE (NEL) U+0085
60  ON_NoBreakSpace = 0x00A0, // NO-BREAK SPACE (NBSP)
61  ON_NarrowNoBreakSpace = 0x202F, // NARROW NO-BREAK SPACE (NNBSP)
62  ON_ZeroWidthSpace = 0x200B, // ZERO WIDTH SPACE (ZWSP)
63 
64  //////////////////////////////////////////////////////////////
65  //
66  // Annotation symbols
67  //
68  ON_RadiusSymbol = 0x0052, // LATIN CAPITAL LETTER R U+0052 (decimal 82)
69  ON_DegreeSymbol = 0x00B0, // DEGREE SIGN U+00B0 (decimal 176)
70  ON_PlusMinusSymbol = 0x00B1, // PLUS-MINUS SIGN U+00B1 (decimal 177)
71  ON_DiameterSymbol = 0x00D8, // LATIN CAPITAL LETTER O WITH STROKE U+00D8 (decimal 216)
72 
73  //////////////////////////////////////////////////////////////
74  //
75  // Unambiguous format control code points
76  //
77  ON_LineSeparator = 0x2028, // LINE SEPARATOR U+2028 unambiguous line separator
78  ON_ParagraphSeparator = 0x2029, // PARAGRAPH SEPARATOR U+2028 unambiguous paragraph separator
79 
80  //////////////////////////////////////////////////////////////
81  //
82  // Greek, Cyrillic and CJK glyph code points used for testing purposes.
83  //
84  ON_GreekAlpha = 0x03B1, // GREEK SMALL LETTER ALPHA
85  ON_CyrillicCapitalYu = 0x042E, // CYRILLIC CAPITAL LETTER YU
86  ON_SimplifiedChineseTree = 0x6881,
87  ON_TraditionalChineseTree = 0x6A39,
88  ON_JapaneseRhinoceros = 0x7280,
89  ON_JapaneseTree = 0x6728,
90  ON_KoreanHan = 0xD55C,
91  ON_KoreanJeong = 0xC815,
92 
93  //////////////////////////////////////////////////////////////
94  //
95  // Currency symbols
96  //
97  ON_DollarSign = 0x0024, // DOLLAR SIGN U+0024
98  ON_CentSign = 0x00A2, // CENT SIGN U+00A2
99  ON_PoundSign = 0x00A3, // POUND SIGN U+00A3
100  ON_CurrencySign = 0x00A4, // CURRENCY SIGN U+00A4
101  ON_YenSign = 0x00A5, // YEN SIGN U+00A5 (Chinese yuan, Japanese yen)
102  ON_EuroSign = 0x20AC, // EURO SIGN U+20AC
103  ON_PesoSign = 0x20B1, // PESO SIGN U+20B1
104  ON_RubleSign = 0x20BD, // RUBLE SIGN U+20BD
105 
106  //////////////////////////////////////////////////////////////
107  //
108  // RECYCLING SYMBOL is useful for testing symbol font substitution
109  //
110  ON_RecyclingSymbol = 0x2672, // UNIVERSAL RECYCLING SYMBOL U+2672 (decimal 9842)
111  ON_BlackRecyclingSymbol = 0x267B, // BLACK UNIVERSAL RECYCLING SYMBOL U+267B (decimal 9851)
112 
113  //////////////////////////////////////////////////////////////
114  //
115  // REPLACEMENT CHARACTER is the conventional glpyh used
116  // to mark locations where UTF encodings contain invalid
117  // information.
118  //
119  ON_ReplacementCharacter = 0xFFFD, // REPLACEMENT CHARACTER U+FFFD (decimal 65533)
120 
121  //////////////////////////////////////////////////////////////
122  //
123  // WASTEBASKET (Good value for testing UTF-16 surrogte pair handling)
124  //
125  // wchar_t sWastebasket[] = {0xD83D,0xDDD1,0}; // correct on Windows. (Windows wchar_t strings are UTF-16 encoded).
126  // wchar_t sWastebasket[] = {0x1F5D1,0}; // correct on OS X (OS X wchar_t strings are UTF-32 encoded).
127  //
128  // WASTEBASKET UTF-8 encodeing = (0xF0, 0x9F, 0x97, 0x91)
129  // WASTEBASKET UTF-16 encodeing = ( 0xD83D, 0xDDD1 ) (surrogate pair)
130  ON_Wastebasket = 0x1F5D1, // WASTEBASKET U+1F5D1 (decimal 128465)
131 
132  //////////////////////////////////////////////////////////////
133  //
134  // Valid codepoint values are <= 0x10FFFF
135  // See ON_IsValidUnicodeCodepoint() for additional restrictions.
136  //
137  ON_InvalidCodePoint = 0x110000
138 };
139 
140 /*
141 Returns:
142  ON_UTF_16BE
143  The byte order on where the function was run is big endian.
144  ON_UTF_16L
145  The byte order on where the function was run is little endian.
146 */
147 ON_DECL
148 enum ON_UnicodeEncoding ON_UnicodeNativeCPU_UTF16();
149 
150 /*
151 Returns:
152  ON_UTF_32BE
153  The byte order on where the function was run is big endian.
154  ON_UTF_32LE
155  The byte order on where the function was run is little endian.
156 */
157 ON_DECL
158 enum ON_UnicodeEncoding ON_UnicodeNativeCPU_UTF32();
159 
160 /*
161 Description:
162  Determine if the buffer has the values of a UTF BOM (byte order mark)
163 Parameters:
164  buffer - [in]
165  buffer to test
166  sizeof_buffer - [in]
167  number of bytes that can be examined in the buffer
168 Returns:
169  ON_UTF_unset (0)
170  buffer is not a UTF BOM
171  ON_UTF_8
172  sizeof_buffer >= 3 and the values fo the first three bytes
173  are 0xEF, 0xBB, 0xBF.
174  ON_UTF_16BE
175  sizeof_buffer >= 2 and the values of the first two bytes
176  are 0xFE, 0xFF and, if sizeof_buffer >= 4, the value of
177  one of the thrid or forth byte is not zero.
178  ON_UTF_16LE
179  sizeof_buffer >= 2 and the values of the first two bytes
180  are 0xFE, 0xFF
181  ON_UTF_32BE
182  sizeof_buffer >= 4 and the values of the first four bytes
183  are 0x00, 0x00, 0xFE, 0xFF.
184  ON_UTF_32LE
185  sizeof_buffer >= 4 and the values of the first four bytes
186  are 0xFF, 0xFE, 0x00, 0x00.
187 */
188 ON_DECL
189 enum ON_UnicodeEncoding ON_IsUTFByteOrderMark(
190  const void* buffer,
191  size_t sizeof_buffer
192  );
193 
194 /*
195 Parameters:
196  e - [in]
197 Returns:
198  Number of bytes in byte order mark for the specified encoding.
199 */
200 ON_DECL
201 unsigned int ON_UTFSizeofByteOrderMark(
202  enum ON_UnicodeEncoding e
203  );
204 
205 /*
206 Description:
207  Test a value to determine if it is a valid unicode code point value.
208 Parameters:
209  u - [in] value to test
210 Returns:
211  true: u is a valid unicode code point
212  false: u is not a valid unicode code point
213 Remarks:
214  Valid unicode code point values u satisfy
215  (0 <= u && u <= 0xD7FF) || (0xE000 <= u && u <= 0x10FFFF)
216 */
217 ON_DECL
218 int ON_IsValidUnicodeCodePoint(
219  ON__UINT32 u
220  );
221 
222 /*
223 Description:
224  Test a value to determine if it is a valid unicode code point value.
225 Parameters:
226  w - [in] value to test
227 Returns:
228  true:
229  w is a valid single wchar_t value
230  false:
231  w is not a valid single wchar_t value.
232  It may be a completely invalid value for a string.
233  When sizeof(w) < 4, is may also b a value used in a
234  multiple element encoding like a surrogate pair value.
235 Remarks:
236  Valid 1 byte wchar_t UTF-8 unicode code points are
237  (0 <= w && w <= 0x7F)
238  Valid 2 byte wchar_t UTF-16 unicode code points are
239  (0 <= w && w <= 0xD7FF) || (0xE000 <= w && w <= 0xFFFF)
240  Valid 4 bytes wchar_t UTF-32 unicode code points are
241  (0 <= u && u <= 0xD7FF) || (0xE000 <= u && u <= 0x10FFFF)
242 */
243 ON_DECL
244 int ON_IsValidSingleElementWideCharValue(
245  wchar_t w
246  );
247 
248 ON_DECL
249 int ON_IsValidUTF16SurrogatePair(
250  unsigned int w1,
251  unsigned int w2
252  );
253 
254 /*
255 Description:
256  Test a value to determine if it is a valid UTF-32 value.
257 Parameters:
258  c - [in] value to test
259 Returns:
260  true: c is a valid UTF-32 value
261  false: c is not a valid UTF-32 value
262 Remarks:
263  Valid single element UTF-32 values are
264  (0 <= u && u <= 0xD7FF) || (0xE000 <= u && u <= 0x10FFFF)
265 */
266 ON_DECL
267 int ON_IsValidUTF32Value(
268  ON__UINT32 c
269  );
270 
271 
272 /*
273 Description:
274  Test a value to determine if it is a valid single element UTF-16 value.
275 Parameters:
276  c - [in] value to test
277 Returns:
278  true: c is a valid single wchar_t unicode code point
279  false: c is not a valid unicode code point
280  w is not a valid single element UTF-16 value.
281  It may be a completely invalid value or it
282  may be a value used in a surrogate pair.
283 Remarks:
284  Valid single element UTF-16 values are
285  (0 <= c && c <= 0xD7FF) || (0xE000 <= c && c <= 0xFFFF)
286 */
287 ON_DECL
288 int ON_IsValidUTF16Singleton(
289  ON__UINT32 c
290  );
291 
292 // ON_DEPRECATED
293 ON_DECL
294 int ON_IsValidSingleElementUTF16Value(
295 ON__UINT32 c
296 );
297 
298 
299 
300 /*
301 Description:
302  Test a value to determine if it is a valid single byte UTF-8 value.
303 Parameters:
304  c - [in] value to test
305 Returns:
306  true: c is a valid single byte UTF-8 value
307  false: c is not a valid single byte UTF-8 value
308 Remarks:
309  Valid single byte UTF-8 values are (0 <= w && w <= 0x7F)
310 */
311 ON_DECL
312 int ON_IsValidUTF8SingletonChar(
313  char c
314  );
315 // ON_DEPRECATED
316 ON_DECL
317 int ON_IsValidSingleByteUTF8CharValue(
318  char c
319  );
320 
321 
322 /*
323 Description:
324  Test a value to determine if it is a valid single byte UTF-8 value.
325 Parameters:
326  c - [in] value to test
327 Returns:
328 true: c is a valid single byte UTF-8 value
329 false: c is not a valid single byte UTF-8 value
330 Remarks:
331  Valid single byte UTF-8 values are (0 <= c && c <= 0x7F)
332 */
333 ON_DECL
334 int ON_IsValidUTF8Singleton(
335  ON__UINT32 c
336  );
337 
338 // ON_DEPRECATED
339 ON_DECL
340 int ON_IsValidSingleElementUTF8Value(
341  ON__UINT32 c
342  );
343 
345 struct ON_CLASS ON_UnicodeErrorParameters
346 {
347 #if defined(ON_CPLUSPLUS)
348  //
349  static const ON_UnicodeErrorParameters MaskErrors; // m_error_status = 0, m_error_mask = 0xFFFFFFFF, m_error_code_point = ON_UnicodeCodePoint::ON_ReplacementCharacter
350  static const ON_UnicodeErrorParameters FailOnErrors; // m_error_status = 0, m_error_mask = 0, m_error_code_point = ON_UnicodeCodePoint::ON_ReplacementCharacter
351 #endif
352 
353  /*
354  If an error occurs, then bits of error_status are
355  set to indicate what type of error occured.
356 
357  Error types:
358  1: The input parameters were invalid.
359  This error cannot be masked.
360 
361  2: The ouput buffer was not large enough to hold the converted
362  string. As much conversion as possible is performed in this
363  case and the error cannot be masked.
364 
365  4: When parsing a UTF-8 or UTF-32 string, the values of two
366  consecutive encoding sequences formed a valid UTF-16
367  surrogate pair.
368 
369  This error is masked if 0 != (4 & m_error_mask).
370  If the error is masked, then the surrogate pair is
371  decoded, the value of the resulting unicode code point
372  is used, and parsing continues.
373 
374  8: An overlong UTF-8 encoding sequence was encountered and
375  the value of the overlong sUTF-8 equence was a valid
376  unicode code point.
377 
378  This error is masked if 0 != (8 & m_error_mask).
379  If the error is masked, then the unicode code point
380  is used and parsing continues.
381 
382  16: An illegal UTF-8, UTF-16 or UTF-32 sequence occured,
383  or an unsupported or invalid Windows code page value,
384  or an invalid unicode code point value resulted from
385  decoding a UTF-8 sequence.
386 
387  This error is masked if 0 != (16 & m_error_mask).
388  If the error is masked and the value of m_error_code_point is
389  a valid unicode code point, then m_error_code_point is used
390  and parsing continues.
391  */
392  unsigned int m_error_status;
393 
394  /*
395  If 0 != (error_mask & 4), then type 4 errors are masked.
396  If 0 != (error_mask & 8), then type 8 errors are masked.
397  If 0 != (error_mask & 16) and m_error_code_point is a valid unicode
398  code point value, then type 16 errors are masked.
399  */
400  unsigned int m_error_mask;
401 
402  /*
403  Unicode code point value to use in when masking type 16 errors.
404  If 0 == (error_mask & 16), then this parameter is ignored.
405  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
406  is a popular choice for the m_error_code_point value.
407  */
408  ON__UINT32 m_error_code_point;
409 };
410 
411 /*
412 Description:
413  Decode a UTF-32 little endian byte order string to get a single unicode code point.
414 Parameters:
415  sUTF32 - [in]
416  UTF-32 little byte order string to convert.
417 
418  sUTF32_count - [in]
419  number of ON__UINT32 elements in sUTF32[].
420 
421  e - [in/out]
422  If e is null, errors are not masked and parsing is performed
423  to the point where the first error occurs.
424  If e is not null, all errors are reported by setting the appropriate
425  e->m_error_status bits and errors are handled as described in the
426  definition of the ON_UnicodeErrorParameters struct.
427 
428  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
429  or
430  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
431  are good ways to initialize the input values.
432 
433  unicode_code_point - [out]
434  The unicode_code_point pointer must not be null.
435  If a nonzero value is returned, then *unicode_code_point is
436  a valid unicode code point value in the CPU's native byte order.
437 Returns:
438  Number of elements of sUTF32 that were parsed.
439  0:
440  Nothing was decoded. The input value of *unicode_code_point
441  is not changed. See e->m_error_status.
442  1:
443  If no error occured, then sUTF32[0] was decoded was a valid
444  UTF-32 value. See e for masked errors.
445  2:
446  sUTF32[0],sUTF32[1] had values of a valid UTF-16 surrogate pair
447  and e indicated to mask this error. The UTF-16 code point
448  value was returned and e was set to indicate the error occured.
449 */
450 ON_DECL
451 int ON_DecodeUTF32LE(
452  const ON__UINT32* sUTF32,
453  int sUTF32_count,
454  struct ON_UnicodeErrorParameters* e,
455  ON__UINT32* unicode_code_point
456  );
457 
458 /*
459 Description:
460  Decode a UTF-32 big endian byte order string to get a single unicode code point.
461 Parameters:
462  sUTF32 - [in]
463  UTF-32 big byte order string to convert.
464 
465  sUTF32_count - [in]
466  number of ON__UINT32 elements in sUTF32[].
467 
468  e - [in/out]
469  If e is null, errors are not masked and parsing is performed
470  to the point where the first error occurs.
471  If e is not null, all errors are reported by setting the appropriate
472  e->m_error_status bits and errors are handled as described in the
473  definition of the ON_UnicodeErrorParameters struct.
474 
475  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
476  or
477  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
478  are good ways to initialize the input values.
479 
480  unicode_code_point - [out]
481  The unicode_code_point pointer must not be null.
482  If a nonzero value is returned, then *unicode_code_point is
483  a valid unicode code point value in the CPU's native byte order.
484 Returns:
485  Number of elements of sUTF32 that were parsed.
486  0:
487  Nothing was decoded. The input value of *unicode_code_point
488  is not changed. See e->m_error_status.
489  1:
490  If no error occured, then sUTF32[0] was decoded was a valid
491  UTF-32 value. See e for masked errors.
492  2:
493  sUTF32[0],sUTF32[1] had values of a valid UTF-16 surrogate pair
494  and e indicated to mask this error. The UTF-16 code point
495  value was returned and e was set to indicate the error occured.
496 */
497 ON_DECL
498 int ON_DecodeUTF32BE(
499  const ON__UINT32* sUTF32,
500  int sUTF32_count,
501  struct ON_UnicodeErrorParameters* e,
502  ON__UINT32* unicode_code_point
503  );
504 
505 
506 /*
507 Description:
508  Decode a UTF-32 native byte order string to get a single unicode code point.
509 Parameters:
510  sUTF32 - [in]
511  UTF-32 native byte order string to convert.
512 
513  sUTF32_count - [in]
514  number of ON__UINT32 elements in sUTF32[].
515 
516  e - [in/out]
517  If e is null, errors are not masked and parsing is performed
518  to the point where the first error occurs.
519  If e is not null, all errors are reported by setting the appropriate
520  e->m_error_status bits and errors are handled as described in the
521  definition of the ON_UnicodeErrorParameters struct.
522 
523  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
524  or
525  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
526  are good ways to initialize the input values.
527 
528  unicode_code_point - [out]
529  The unicode_code_point pointer must not be null.
530  If a nonzero value is returned, then *unicode_code_point is
531  a valid unicode code point value in the CPU's native byte order.
532 Returns:
533  Number of elements of sUTF32 that were parsed.
534  0:
535  Nothing was decoded. The input value of *unicode_code_point
536  is not changed. See e->m_error_status.
537  1:
538  If no error occured, then sUTF32[0] was decoded was a valid
539  UTF-32 value. See e for masked errors.
540  2:
541  sUTF32[0],sUTF32[1] had values of a valid UTF-16 surrogate pair
542  and e indicated to mask this error. The UTF-16 code point
543  value was returned and e was set to indicate the error occured.
544 */
545 ON_DECL
546 int ON_DecodeUTF32(
547  const ON__UINT32* sUTF32,
548  int sUTF32_count,
549  struct ON_UnicodeErrorParameters* e,
550  ON__UINT32* unicode_code_point
551  );
552 
553 /*
554 Description:
555  Decode a UTF-32 string whose elements have byte order
556  opposite the native CPU's to get a single unicode code point.
557 Parameters:
558  sUTF32 - [in]
559  UTF-32 string to convert with byte order opposite the
560  CPU's native byte order.
561 
562  sUTF32_count - [in]
563  number of ON__UINT32 elements in sUTF32[].
564 
565  e - [in/out]
566  If e is null, errors are not masked and parsing is performed
567  to the point where the first error occurs.
568  If e is not null, all errors are reported by setting the appropriate
569  e->m_error_status bits and errors are handled as described in the
570  definition of the ON_UnicodeErrorParameters struct.
571 
572  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
573  or
574  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
575  are good ways to initialize the input values.
576 
577  unicode_code_point - [out]
578  The unicode_code_point pointer must not be null.
579  If a nonzero value is returned, then *unicode_code_point is
580  a valid unicode code point value in the CPU's native byte order.
581 Returns:
582  Number of elements of sUTF32 that were parsed.
583  0:
584  Nothing was decoded. The input value of *unicode_code_point
585  is not changed. See e->m_error_status.
586  1:
587  If no error occured, then sUTF32[0] was decoded was a valid
588  UTF-32 value. See e for masked errors.
589  2:
590  sUTF32[0],sUTF32[1] had values of a valid UTF-16 surrogate pair
591  and e indicated to mask this error. The UTF-16 code point
592  value was returned and e was set to indicate the error occured.
593 */
594 ON_DECL
595 int ON_DecodeSwapByteUTF32(
596  const ON__UINT32* sUTF32,
597  int sUTF32_count,
598  struct ON_UnicodeErrorParameters* e,
599  ON__UINT32* unicode_code_point
600  );
601 
602 /*
603 Description:
604  Convert valid unicode code point values to its UTF-8 form and use the
605  same encoding calculation for other integers with values <= 0x7FFFFFFF.
606  When strict UTF-8 encoding is desired, the caller is responsible for
607  insuring the value of u is a valid uncode codepoint.
608 Parameters:
609  u - [in]
610  Interger in the CPU's native byte order in the interval [0,2147483647].
611  sUTF8 - [out]
612  sUTF8 is a buffer of 6 char elements and the UTF-8 form
613  is returned in sUTF8[]. The returned value specifies how
614  many elements of sUTF8[] are set.
615 Returns:
616  0: u is too large (>=2^31) to be encode.
617  No changes are made to the sUTF8[] values.
618  1: the UTF-8 form of u is 1 byte returned in sUTF8[0].
619  2: the UTF-8 form of u is 2 byts returned in sUTF8[0],sUTF8[1].
620  3: the UTF-8 form of u is 3 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2].
621  4: the UTF-8 form of u is 4 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3].
622  Note: The maximum valid unicode codepoint is 0x10FFFF. Values of u > 0x10FFFF
623  and u <= 0x1FFFFF are encoded to 4 bytes using the same algorithm.
624  5: the Universal Character Set form of u
625  is 5 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4].
626  6: the Universal Character Set form of u
627  is 6 bytes returned in sUTF8[0],sUTF8[1],sUTF8[2],sUTF8[3],sUTF8[4],sUTF8[5].
628  For return values requiring less than 6 bytes, no changes
629  are made to the unused bytes in sUTF8[].
630 Remarks:
631  Any integer in the range 0 to 2^31 - 1 can be encoded.
632  When a unicode string is being encoded take steps to ensure that
633  u is a valid unicode code point value.
634  The function ON_IsValidUnicodeCodePoint() can be used to determine
635  if u is a valid unicode code point value.
636 */
637 ON_DECL
638 int ON_EncodeUTF8( ON__UINT32 u, char sUTF8[6] );
639 
640 /*
641 Description:
642  Decode a UTF-8 encode string to get a single unicode code point.
643 Parameters:
644  sUTF8 - [in]
645  UTF-8 string to convert.
646 
647  sUTF8_count - [in]
648  number of char elements in sUTF8[].
649 
650  e - [in/out]
651  If e is null, errors are not masked and parsing is performed
652  to the point where the first error occurs.
653  If e is not null, all errors are reported by setting the appropriate
654  e->m_error_status bits and errors are handled as described in the
655  definition of the ON_UnicodeErrorParameters struct.
656 
657  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
658  or
659  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
660  are good ways to initialize the input values.
661 
662  unicode_code_point - [out]
663  The unicode_code_point pointer must not be null.
664  If a nonzero value is returned, then *unicode_code_point is
665  a valid unicode code point value.
666 Returns:
667  Number of elements of sUTF8 that were parsed.
668  0 indicates failure.
669 */
670 ON_DECL
671 int ON_DecodeUTF8(
672  const char* sUTF8,
673  int sUTF8_count,
674  struct ON_UnicodeErrorParameters* e,
675  ON__UINT32* unicode_code_point
676  );
677 
678 /*
679 Description:
680  Convert a 4 byte unicode code point value to its UTF-16 form.
681 Parameters:
682  unicode_code_point - [in]
683  4 byte unicode code point value in the CPU's native byte order.
684  Valid values are in the interval [0,0xD7FF] or the
685  interval [0xE000,0x10FFFF].
686  sUTF16 - [out]
687  sUTF16 is buffer of 2 ON__UINT16 elements. If the UTF-16 form
688  is a single value, it is returned in sUTF16[0]. If the UTF-16
689  is a surrogate pair, the first code unit (high surrogate)
690  is returned sUTF16[0] and the second unit (low surrogate) is
691  returned in sUTF16[1]. The returned values are in
692  the CPU's native byte order.
693 Returns:
694  0: u is not a valid Unicode code point. No changes are
695  made to the sUTF16[] values.
696  1: u is a valid Unicode code point with a UTF-16 form
697  consisting of the single value returned in sUTF16[0].
698  2: u is a valid Unicode code point with a UTF-16 form
699  consisting of a surrogate pair returned in sUTF16[0] and sUTF16[1].
700 */
701 ON_DECL
702 int ON_EncodeUTF16( ON__UINT32 unicode_code_point, ON__UINT16 sUTF16[2] );
703 
704 /*
705 Description:
706  Decode a UTF-16 little endian byte order string to get a single unicode code point.
707 Parameters:
708  sUTF16 - [in]
709  UTF-16 little endian byte order string to convert.
710 
711  sUTF16_count - [in]
712  number of ON__UINT16 elements in sUTF16[].
713 
714  e - [in/out]
715  If e is null, errors are not masked and parsing is performed
716  to the point where the first error occurs.
717  If e is not null, all errors are reported by setting the appropriate
718  e->m_error_status bits and errors are handled as described in the
719  definition of the ON_UnicodeErrorParameters struct.
720 
721  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
722  or
723  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
724  are good ways to initialize the input values.
725 
726  unicode_code_point - [out]
727  The unicode_code_point pointer must not be null.
728  If a nonzero value is returned, then *unicode_code_point is
729  a valid unicode code point value in the CPU's native byte order.
730 Returns:
731  Number of elements of sUTF16 that were parsed.
732  0:
733  Nothing was decoded. The input value of *unicode_code_point
734  is not changed. See e->m_error_status.
735  1:
736  If no error occured, then sUTF16[0] was decoded as a valid
737  UTF-16 singleton. See e for masked errors.
738  2:
739  If no error occured, then sUTF16[0],sUTF16[1] was decoded
740  as a valid UTF-16 surrogate pair.
741  See e for masked errors.
742  n >= 3:
743  sUTF16[0],..,sUTF16[n-1] did not forma valid UTF-16 encoding
744  and were parsed as reasonably as possible.
745  See e for masked errors.
746 */
747 ON_DECL
748 int ON_DecodeUTF16LE(
749  const ON__UINT16* sUTF16,
750  int sUTF16_count,
751  struct ON_UnicodeErrorParameters* e,
752  ON__UINT32* unicode_code_point
753  );
754 
755 /*
756 Description:
757  Decode a UTF-16 big endian byte order string to get a single unicode code point.
758 Parameters:
759  sUTF16 - [in]
760  UTF-16 big endian byte order string to convert.
761 
762  sUTF16_count - [in]
763  number of ON__UINT16 elements in sUTF16[].
764 
765  e - [in/out]
766  If e is null, errors are not masked and parsing is performed
767  to the point where the first error occurs.
768  If e is not null, all errors are reported by setting the appropriate
769  e->m_error_status bits and errors are handled as described in the
770  definition of the ON_UnicodeErrorParameters struct.
771 
772  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
773  or
774  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
775  are good ways to initialize the input values.
776 
777  unicode_code_point - [out]
778  The unicode_code_point pointer must not be null.
779  If a nonzero value is returned, then *unicode_code_point is
780  a valid unicode code point value in the CPU's native byte order.
781 Returns:
782  Number of elements of sUTF16 that were parsed.
783  0:
784  Nothing was decoded. The input value of *unicode_code_point
785  is not changed. See e->m_error_status.
786  1:
787  If no error occured, then sUTF16[0] was decoded as a valid
788  UTF-16 singleton. See e for masked errors.
789  2:
790  If no error occured, then sUTF16[0],sUTF16[1] was decoded
791  as a valid UTF-16 surrogate pair.
792  See e for masked errors.
793  n >= 3:
794  sUTF16[0],..,sUTF16[n-1] did not forma valid UTF-16 encoding
795  and were parsed as reasonably as possible.
796  See e for masked errors.
797 */
798 ON_DECL
799 int ON_DecodeUTF16BE(
800  const ON__UINT16* sUTF16,
801  int sUTF16_count,
802  struct ON_UnicodeErrorParameters* e,
803  ON__UINT32* unicode_code_point
804  );
805 
806 /*
807 Description:
808  Decode a UTF-16 string in native byte order to get a single unicode code point.
809 Parameters:
810  sUTF16 - [in]
811  UTF-16 string in native byte order to convert.
812 
813  sUTF16_count - [in]
814  number of ON__UINT16 elements in sUTF16[].
815 
816  e - [in/out]
817  If e is null, errors are not masked and parsing is performed
818  to the point where the first error occurs.
819  If e is not null, all errors are reported by setting the appropriate
820  e->m_error_status bits and errors are handled as described in the
821  definition of the ON_UnicodeErrorParameters struct.
822 
823  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
824  or
825  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
826  are good ways to initialize the input values.
827 
828  unicode_code_point - [out]
829  The unicode_code_point pointer must not be null.
830  If a nonzero value is returned, then *unicode_code_point is
831  a valid unicode code point value in the CPU's native byte order.
832 Returns:
833  Number of elements of sUTF16 that were parsed.
834  0:
835  Nothing was decoded. The input value of *unicode_code_point
836  is not changed. See e->m_error_status.
837  1:
838  If no error occured, then sUTF16[0] was decoded as a valid
839  UTF-16 singleton. See e for masked errors.
840  2:
841  If no error occured, then sUTF16[0],sUTF16[1] was decoded
842  as a valid UTF-16 surrogate pair.
843  See e for masked errors.
844  n >= 3:
845  sUTF16[0],..,sUTF16[n-1] did not forma valid UTF-16 encoding
846  and were parsed as reasonably as possible.
847  See e for masked errors.
848 */
849 ON_DECL
850 int ON_DecodeUTF16(
851  const ON__UINT16* sUTF16,
852  int sUTF16_count,
853  struct ON_UnicodeErrorParameters* e,
854  ON__UINT32* unicode_code_point
855  );
856 
857 /*
858 Description:
859  Decode a UTF-16 encode string whose elements have byte order
860  opposite native byte order to get a single unicode code point.
861 Parameters:
862  sUTF16 - [in]
863  UTF-16 string to convert with byte order opposite the
864  CPU's native byte order.
865 
866  sUTF16_count - [in]
867  number of ON__UINT16 elements in sUTF16[].
868 
869  e - [in/out]
870  If e is null, errors are not masked and parsing is performed
871  to the point where the first error occurs.
872  If e is not null, all errors are reported by setting the appropriate
873  e->m_error_status bits and errors are handled as described in the
874  definition of the ON_UnicodeErrorParameters struct.
875 
876  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
877  or
878  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
879  are good ways to initialize the input values.
880 
881  unicode_code_point - [out]
882  The unicode_code_point pointer must not be null.
883  If a nonzero value is returned, then *unicode_code_point is
884  a valid unicode code point value in the CPU's native byte order.
885 Returns:
886  Number of elements of sUTF16 that were parsed.
887  0:
888  Nothing was decoded. The input value of *unicode_code_point
889  is not changed. See e->m_error_status.
890  1:
891  If no error occured, then sUTF16[0] was decoded as a valid
892  UTF-16 singleton. See e for masked errors.
893  2:
894  If no error occured, then sUTF16[0],sUTF16[1] was decoded
895  as a valid UTF-16 surrogate pair.
896  See e for masked errors.
897  n >= 3:
898  sUTF16[0],..,sUTF16[n-1] did not forma valid UTF-16 encoding
899  and were parsed as reasonably as possible.
900  See e for masked errors.
901 */
902 ON_DECL
903 int ON_DecodeSwapByteUTF16(
904  const ON__UINT16* sUTF16,
905  int sUTF16_count,
906  struct ON_UnicodeErrorParameters* e,
907  ON__UINT32* unicode_code_point
908  );
909 
910 /*
911 Description:
912  Decode a UTF-16 encode string whose elements have byte order
913  opposite the native CPU's to get a single unicode code point.
914 Parameters:
915  sWideChar - [in]
916  wchar_t string to convert.
917 
918  sWideChar_count - [in]
919  number of wchar_t elements in sWideChar[].
920 
921  e - [in/out]
922  If e is null, errors are not masked and parsing is performed
923  to the point where the first error occurs.
924  If e is not null, all errors are reported by setting the appropriate
925  e->m_error_status bits and errors are handled as described in the
926  definition of the ON_UnicodeErrorParameters struct.
927 
928  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::MaskErrors;
929  or
930  ON_UnicodeErrorParameters e = ON_UnicodeErrorParameters::FailOnErrors;
931  are good ways to initialize the input values.
932 
933  unicode_code_point - [out]
934  The unicode_code_point pointer must not be null.
935  If a nonzero value is returned, then *unicode_code_point is
936  a valid unicode code point value in the CPU's native byte order.
937 Returns:
938  Number of elements of sUTF16 that were parsed.
939  0:
940  Nothing was decoded. The input value of *unicode_code_point
941  is not changed. See e->m_error_status.
942  1:
943  If no error occured, then sWideChar[0] was decoded as a valid
944  wchar_t singleton. See e for masked errors.
945  n>=2:
946  If no error occured, then sWideChar[0],..,sWideChar[n-1] was decoded
947  as a valid wchar_t multi-element encoding.
948  Typically, UTF-16 surrogate pair or UTF-8 multi-byte sequence.
949  See e for masked errors.
950 */
951 ON_DECL
952 int ON_DecodeWideChar(
953  const wchar_t* sWideChar,
954  int sWideChar_count,
955  struct ON_UnicodeErrorParameters* e,
956  ON__UINT32* unicode_code_point
957  );
958 
959 
960 /*
961 Description:
962  Convert an RFT charset value to a Windows code page.
963  This conversion is part of the process to get a UNICODE encoding of strings in RTF files.
964 
965 Parameters:
966  rtf_charset - [in]
967  The RTF charset specifed by /fcharsetN in the RTF font table.
968  default_code_page - [out]
969  Value to return if none is associated with the input rtf_charset value.
970 
971 Returns:
972  code page
973 
974 Example:
975 
976  The RTF:
977  ...
978  {\fonttbl
979  ...
980  {\f2\fcharset129 Malgun Gothic;}
981  ...
982  }
983  ...
984  {\f2 {\ltrch \'c7\'d1\'b1\'db...}
985  ...
986 
987  Uses RTF charset 129 which maps to Windows code page 949. This means
988  {0xC7,0xD1,0xB1,0xBD, ... } needs to be parsed as a code page 949 multibyte encoding.
989  The function ON_MapWindowsCodePage949ToUnicode() can be used to convert
990  Windows code page 949 values to UNICODE code point values.
991 
992  code page 949 0xC7D1 -> U+D55C #HANGUL SYLLABLE HIEUH A NIEUN
993  code page 949 0xB1BD -> U+AD75 #HANGUL SYLLABLE KIYEOK U RIEULKIYEOK
994 
995  NOTE WELL:
996  The Windows code page 949 encoding uses both single and double byte encodings.
997  When the initial byte has a value < 0x80, it is a single byte encoding.
998  When the initial byte has a value > 0x80, it is a double byte encoding.
999 
1000 Remarks:
1001  Conversions are based on the Rich Text Format (RTF) Specification Version 1.9.1
1002 */
1003 ON_DECL
1004 ON__UINT32 ON_MapRTFcharsetToWindowsCodePage(
1005  ON__UINT32 rtf_charset,
1006  ON__UINT32 default_code_page
1007 );
1008 
1009 /*
1010 Description:
1011  Get a pointer to an array of 128 UNICODE code point values that are the best fit
1012  for Microsoft single byte code page encodings of 0x80 to 0xFF inclusive.
1013 
1014 Parameters:
1015  code_page - [in]
1016  A Microsoft single byte code page value. (1252, 10000, etc)
1017 Returns:
1018  If code_page identifies a supported single byte code page, then an array
1019  of 128 UNICODE code points sorted by single byte encoding is returned.
1020  If a the single byte encoding is not defined, the corresponding element
1021  Otherwise nullptr is returned.
1022 
1023 Example:
1024 
1025  const ON__UINT32 code_page = ...;
1026  ON__UINT32 cp_encoding = ...;
1027  const ON__UINT32* cp_to_unicode = ON_MSSBCP_0x80_0xFF_Unicode(code_page);
1028  ON__UINT32 unicode_code_point
1029  = (nullptr != cp_to_unicode && cp_encoding >= 0x80 && cp_encoding <= 0xFF)
1030  ? cp_to_unicode[cp_encoding - 0x80]
1031  : ON_UnicodeCodePoint::ON_ReplacementCharacter;
1032 
1033 */
1034 ON_DECL
1035 const ON__UINT32* ON_MSSBCP_0x80_0xFF_Unicode(
1036  ON__UINT32 code_page
1037  );
1038 
1039 /*
1040 Description:
1041  Convert a Microsoft single byte code page value to a UNICODE code point.
1042  Values 0x20 to 0x7E are the same as the ASCII encoding.
1043 
1044 Parameters:
1045  code_page - [in]
1046  A Microsoft single byte code page value. (1252, 10000, etc)
1047  code_page_single_byte_encoding - [in]
1048  A single byte encoding of the desired glpyh.
1049 
1050 Returns:
1051  If cod page and code_page_single_byte_encoding are valid, then
1052  the best fit unicode code point is returned.
1053  Otherwise ON_UnicodeCodePoint::ON_ReplacementCharacter ( 0xFFFD ) is returned.
1054 */
1055 ON_DECL
1056 ON__UINT32 ON_MapMSSBCPToUnicode(
1057  ON__UINT32 code_page,
1058  ON__UINT32 code_page_single_byte_encoding
1059 );
1060 
1061 /*
1062 Description:
1063  Convert a Unicode code point to a Microsoft code page 1252 character value.
1064  Windows code page 1252 is a single byte encoding.
1065  Values 0x20 to 0x7E are the same as the ASCII encoding.
1066 
1067  This function is used to find fonts where glpyhs are identified by code page 1252 values.
1068 
1069 Parameters:
1070  code_page - [in]
1071  A Microsoft single byte code page value. (1252, 10000, etc)
1072  unicode_code_point - [in]
1073  UNICODE code point
1074 
1075 Returns:
1076  If unicode_code_point has a corresponding single byte encoding on the specified code page,
1077  then the single byte encoding is returned.
1078  Otherwise 0xFFFFFFFF is returned.
1079 */
1080 ON_DECL
1081 ON__UINT32 ON_MapUnicodeToMSSBCP(
1082  ON__UINT32 code_page,
1083  ON__UINT32 unicode_code_point
1084 );
1085 
1086 /*
1087 Description:
1088  Convert unicode code point values to its wide char form.
1089 Parameters:
1090  code_point - [in]
1091  Unicode code point in the CPU's native byte order.
1092  w_capacity - [in]
1093  If the platform wchar_t encoding is UTF-32, then w_capacity >= 1 is sufficient.
1094  If the platform wchar_t encoding is UTF-16, then w_capacity >= 2 is sufficient.
1095  If the platform wchar_t encoding is UTF-8, then w_capacity >= 6 is sufficient.
1096  w - [out]
1097  w is a buffer of w_capacity wchar_t elements and the wide char
1098  encoding of code_point is returned in w[].
1099  The returned value specifies how many elements of w[] are set.
1100  When w_capacity > the returned value, the encoding is null terminated.
1101 Returns:
1102  0: Invalid input (code_point is not a valid Unicode code point or
1103  w is nullptr).
1104  No changes are made to the w[] values.
1105  1: the wchar_t encoding of code_point is 1 wchar_t element returned in w[0].
1106  2: the wchar_t encoding form of code_point is 2 wchar_t element returned in w[0],w[1].
1107  3: the UTF-8 wchar_t encoding form of code_point is 3 wchar_t element returned in w[0],w[1],w[2].
1108  4: the UTF-8 wchar_t encoding form of code_point is 4 wchar_t element returned in w[0],w[1],w[2],w[3].
1109  5: the UTF-8 wchar_t encoding form of code_point is 5 wchar_t element returned in w[0],w[1],w[2],w[3],w[4].
1110  6: the UTF-8 wchar_t encoding form of code_point is 6 wchar_t element returned in w[0],w[1],w[2],w[3],w[4],w[5].
1111 */
1112 ON_DECL
1113 int ON_EncodeWideChar(
1114  ON__UINT32 code_point,
1115  size_t w_capacity,
1116  wchar_t* w
1117  );
1118 
1119 /*
1120 Description:
1121  Convert a unicode string from a UTF-8 encoded char array
1122  into a UTF-8 encoded char array. This function can be
1123  used to clean UTF-8 strings that have a leading
1124  byte-order-mark (BOM) or contain encoding errors.
1125 
1126 Parameters:
1127  bTestByteOrder - [in]
1128  If bTestByteOrder is true and the the input buffer is a
1129  byte order mark (BOM), then the BOM is skipped. It the value
1130  of the BOM is byte swapped, then subsequent input elements are
1131  byte swapped before being decoded. Specifically:
1132  - If the size of an input buffer element is 1 byte and the
1133  values of the first three input elements are a UTF-8 BOM
1134  (0xEF, 0xBB, 0xBF), then the first three input elements are
1135  ignored and decoding begins at the forth input element.
1136  - If the size of an input buffer element is 2 bytes and the value
1137  of the first element is a UTF-16 BOM (0xFEFF), then the first
1138  element is ignored and decoding begins with the second element.
1139  - If the size of an input buffer element is 2 bytes and the value
1140  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
1141  then the first element is ignored, decoding begins with the
1142  second element, and input element bytes are swapped before
1143  being decoded.
1144  - If the size of an input buffer element is 4 bytes and the value
1145  of the first element is a UTF-32 BOM (0x0000FEFF), then the
1146  first element is ignored and decoding begins with the second
1147  element.
1148  - If the size of an input buffer element is 4 bytes and the value
1149  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
1150  then the first element is ignored, decoding begins with the
1151  second element, and input element bytes are swapped before
1152  being decoded.
1153  - In all other cases the first element of the input buffer is
1154  decoded and no byte swapping is performed.
1155 
1156  sInputUTF8 - [in]
1157  UTF-8 encoded string to convert.
1158 
1159  sInputUTF8_count - [in]
1160  If sInputUTF8_count >= 0, then it specifies the number of
1161  char elements in sInputUTF8[] to convert.
1162 
1163  If sInputUTF8_count == -1, then sInputUTF8 must be a null
1164  terminated string and all the elements up to the first
1165  null element are converted.
1166 
1167  sOutputUTF8 - [out]
1168  If sOutputUTF8 is not null and sOutputUTF8_count > 0, then
1169  the output UTF-8 encoded string is returned in this buffer.
1170  If there is room for the null terminator, the converted string
1171  will be null terminated. The null terminator is never included
1172  in the count returned by this function. No byte order mark is
1173  prepended.
1174 
1175  sOutputUTF8_count - [in]
1176  If sOutputUTF8_count > 0, then it specifies the number of available
1177  char elements in the sOutputUTF8[] buffer.
1178 
1179  If sOutputUTF8_count == 0, then the sOutputUTF8 parameter is ignored.
1180 
1181  error_status - [out]
1182  If error_status is not null, then bits of *error_status are
1183  set to indicate the success or failure of the conversion.
1184  When the error_mask parameter is used to used to mask some
1185  conversion errors, multiple bits may be set.
1186  0: Successful conversion with no errors.
1187  1: The input parameters were invalid.
1188  This error cannot be masked.
1189  2: The ouput buffer was not large enough to hold the converted
1190  string. As much conversion as possible is performed in this
1191  case and the error cannot be masked.
1192  4: When parsing a UTF-8 or UTF-32 string, the values of two
1193  consecutive encoding sequences formed a valid UTF-16
1194  surrogate pair.
1195  This error is masked if 0 != (4 & m_error_mask).
1196  If the error is masked, then the surrogate pair is
1197  decoded, the value of the resulting unicode code point
1198  is used, and parsing continues.
1199  8: An overlong UTF-8 encoding sequence was encountered and
1200  the value of the overlong sUTF-8 equence was a valid
1201  unicode code point.
1202  This error is masked if 0 != (8 & m_error_mask).
1203  If the error is masked, then the unicode code point
1204  is used and parsing continues.
1205  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
1206  or an invalid unicode code point value resulted from decoding
1207  a UTF-8 sequence.
1208  This error is masked if 0 != (16 & m_error_mask).
1209  If the error is masked and the value of error_code_point is
1210  a valid unicode code point, then error_code_point is encoded
1211  in the output string and parsing continues.
1212 
1213  error_mask - [in]
1214  If 0 != (error_mask & 4), then type 4 errors are masked.
1215  If 0 != (error_mask & 8), then type 8 errors are masked.
1216  If 0 != (error_mask & 16) and error_code_point is a valid unicode
1217  code point value, then type 16 errors are masked.
1218 
1219  error_code_point - [in]
1220  Unicode code point value to use in when masking type 16 errors.
1221  If 0 == (error_mask & 16), then this parameter is ignored.
1222  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
1223  is a popular choice for the error_code_point value.
1224 
1225  sNextInputUTF8 - [out]
1226  If sNextInputUTF8 is not null, then *sNextInputUTF8 points to
1227  the first element in the input sInputUTF8[] buffer that was not
1228  converted.
1229 
1230  If an error occurs and is not masked, then *sNextInputUTF8 points
1231  to the element of sInputUTF8[] where the conversion failed.
1232  If no errors occur or all errors are masked, then
1233  *sNextInputUTF8 points to sInputUTF8 + sInputUTF8_count.
1234 
1235 Returns:
1236  If sOutputUTF8_count > 0, the return value is the number of char
1237  elements written to sOutputUTF8[]. When the return value < sOutputUTF8_count,
1238  a null terminator is written to sOutputUTF8[return value].
1239 
1240  If sOutputUTF8_count == 0, the return value is the minimum number of
1241  char elements that are needed to hold the converted string.
1242  The return value does not include room for a null terminator.
1243  Increment the return value by one if you want to have an element
1244  to use for a null terminator.
1245 */
1246 ON_DECL
1247 int ON_ConvertUTF8ToUTF8(
1248  int bTestByteOrder,
1249  const char* sInputUTF8,
1250  int sInputUTF8_count,
1251  char* sOutputUTF8,
1252  int sOutputUTF8_count,
1253  unsigned int* error_status,
1254  unsigned int error_mask,
1255  ON__UINT32 error_code_point,
1256  const char** sNextInputUTF8
1257  );
1258 
1259 /*
1260 Description:
1261  Convert a unicode string from a UTF-8 encoded char array
1262  into a UTF-16 encoded ON__UINT16 array.
1263 
1264 Parameters:
1265  bTestByteOrder - [in]
1266  If bTestByteOrder is true and the the input buffer is a
1267  byte order mark (BOM), then the BOM is skipped. It the value
1268  of the BOM is byte swapped, then subsequent input elements are
1269  byte swapped before being decoded. Specifically:
1270  - If the size of an input buffer element is 1 byte and the
1271  values of the first three input elements are a UTF-8 BOM
1272  (0xEF, 0xBB, 0xBF), then the first three input elements are
1273  ignored and decoding begins at the forth input element.
1274  - If the size of an input buffer element is 2 bytes and the value
1275  of the first element is a UTF-16 BOM (0xFEFF), then the first
1276  element is ignored and decoding begins with the second element.
1277  - If the size of an input buffer element is 2 bytes and the value
1278  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
1279  then the first element is ignored, decoding begins with the
1280  second element, and input element bytes are swapped before
1281  being decoded.
1282  - If the size of an input buffer element is 4 bytes and the value
1283  of the first element is a UTF-32 BOM (0x0000FEFF), then the
1284  first element is ignored and decoding begins with the second
1285  element.
1286  - If the size of an input buffer element is 4 bytes and the value
1287  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
1288  then the first element is ignored, decoding begins with the
1289  second element, and input element bytes are swapped before
1290  being decoded.
1291  - In all other cases the first element of the input buffer is
1292  decoded and no byte swapping is performed.
1293 
1294  sUTF8 - [in]
1295  UTF-8 string to convert.
1296 
1297  sUTF8_count - [in]
1298  If sUTF8_count >= 0, then it specifies the number of
1299  char elements in sUTF8[] to convert.
1300 
1301  If sUTF8_count == -1, then sUTF8 must be a null terminated
1302  string and all the elements up to the first null element are
1303  converted.
1304 
1305  sUTF16 - [out]
1306  If sUTF16 is not null and sUTF16_count > 0, then the UTF-16
1307  encoded string is returned in this buffer. If there is room
1308  for the null terminator, the converted string will be null
1309  terminated. The null terminator is never included in the count
1310  of returned by this function. The converted string is in the
1311  CPU's native byte order. No byte order mark is prepended.
1312 
1313  sUTF16_count - [in]
1314  If sUTF16_count > 0, then it specifies the number of available
1315  ON__UINT16 elements in the sUTF16[] buffer.
1316 
1317  If sUTF16_count == 0, then the sUTF16 parameter is ignored.
1318 
1319  error_status - [out]
1320  If error_status is not null, then bits of *error_status are
1321  set to indicate the success or failure of the conversion.
1322  When the error_mask parameter is used to used to mask some
1323  conversion errors, multiple bits may be set.
1324  0: Successful conversion with no errors.
1325  1: The input parameters were invalid.
1326  This error cannot be masked.
1327  2: The ouput buffer was not large enough to hold the converted
1328  string. As much conversion as possible is performed in this
1329  case and the error cannot be masked.
1330  4: When parsing a UTF-8 or UTF-32 string, the values of two
1331  consecutive encoding sequences formed a valid UTF-16
1332  surrogate pair.
1333  This error is masked if 0 != (4 & m_error_mask).
1334  If the error is masked, then the surrogate pair is
1335  decoded, the value of the resulting unicode code point
1336  is used, and parsing continues.
1337  8: An overlong UTF-8 encoding sequence was encountered and
1338  the value of the overlong sUTF-8 equence was a valid
1339  unicode code point.
1340  This error is masked if 0 != (8 & m_error_mask).
1341  If the error is masked, then the unicode code point
1342  is used and parsing continues.
1343  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
1344  or an invalid unicode code point value resulted from decoding
1345  a UTF-8 sequence.
1346  This error is masked if 0 != (16 & m_error_mask).
1347  If the error is masked and the value of error_code_point is
1348  a valid unicode code point, then error_code_point is encoded
1349  in the output string and parsing continues.
1350 
1351  error_mask - [in]
1352  If 0 != (error_mask & 4), then type 4 errors are masked.
1353  If 0 != (error_mask & 8), then type 8 errors are masked.
1354  If 0 != (error_mask & 16) and error_code_point is a valid unicode
1355  code point value, then type 16 errors are masked.
1356 
1357  error_code_point - [in]
1358  Unicode code point value to use in when masking type 16 errors.
1359  If 0 == (error_mask & 16), then this parameter is ignored.
1360  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
1361  is a popular choice for the error_code_point value.
1362 
1363  sNextUTF8 - [out]
1364  If sNextUTF8 is not null, then *sNextUTF8 points to the first
1365  element in the input sUTF8[] buffer that was not converted.
1366 
1367  If an error occurs and is not masked, then *sNextUTF8 points to
1368  the element of sUTF8[] where the conversion failed. If no errors
1369  occur or all errors are masked, then *sNextUTF8 points to
1370  sUTF8 + sUTF8_count.
1371 
1372 Returns:
1373  If sUTF16_count > 0, the return value is the number of ON__UINT16
1374  elements written to sUTF16[]. When the return value < sUTF16_count,
1375  a null terminator is written to sUTF16[return value].
1376 
1377  If sUTF16_count == 0, the return value is the minimum number of
1378  ON__UINT16 elements that are needed to hold the converted string.
1379  The return value does not include room for a null terminator.
1380  Increment the return value by one if you want to have an element
1381  to use for a null terminator.
1382 */
1383 ON_DECL
1384 int ON_ConvertUTF8ToUTF16(
1385  int bTestByteOrder,
1386  const char* sUTF8,
1387  int sUTF8_count,
1388  ON__UINT16* sUTF16,
1389  int sUTF16_count,
1390  unsigned int* error_status,
1391  unsigned int error_mask,
1392  ON__UINT32 error_code_point,
1393  const char** sNextUTF8
1394  );
1395 
1396 /*
1397 Description:
1398  Convert a unicode string from a UTF-8 encoded char array
1399  into a UTF-32 encoded ON__UINT32 array.
1400 
1401 Parameters:
1402  bTestByteOrder - [in]
1403  If bTestByteOrder is true and the the input buffer is a
1404  byte order mark (BOM), then the BOM is skipped. It the value
1405  of the BOM is byte swapped, then subsequent input elements are
1406  byte swapped before being decoded. Specifically:
1407  - If the size of an input buffer element is 1 byte and the
1408  values of the first three input elements are a UTF-8 BOM
1409  (0xEF, 0xBB, 0xBF), then the first three input elements are
1410  ignored and decoding begins at the forth input element.
1411  - If the size of an input buffer element is 2 bytes and the value
1412  of the first element is a UTF-16 BOM (0xFEFF), then the first
1413  element is ignored and decoding begins with the second element.
1414  - If the size of an input buffer element is 2 bytes and the value
1415  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
1416  then the first element is ignored, decoding begins with the
1417  second element, and input element bytes are swapped before
1418  being decoded.
1419  - If the size of an input buffer element is 4 bytes and the value
1420  of the first element is a UTF-32 BOM (0x0000FEFF), then the
1421  first element is ignored and decoding begins with the second
1422  element.
1423  - If the size of an input buffer element is 4 bytes and the value
1424  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
1425  then the first element is ignored, decoding begins with the
1426  second element, and input element bytes are swapped before
1427  being decoded.
1428  - In all other cases the first element of the input buffer is
1429  decoded and no byte swapping is performed.
1430 
1431  sUTF8 - [in]
1432  UTF-8 string to convert.
1433 
1434  sUTF8_count - [in]
1435  If sUTF8_count >= 0, then it specifies the number of
1436  char elements in sUTF8[] to convert.
1437 
1438  If sUTF8_count == -1, then sUTF8 must be a null terminated
1439  string and all the elements up to the first null element are
1440  converted.
1441 
1442  sUTF32 - [out]
1443  If sUTF32 is not null and sUTF32_count > 0, then the UTF-32
1444  encoded string is returned in this buffer. If there is room
1445  for the null terminator, the converted string will be null
1446  terminated. The null terminator is never included in the count
1447  of returned by this function. The converted string is in the
1448  CPU's native byte order. No byte order mark is prepended.
1449 
1450  sUTF32_count - [in]
1451  If sUTF32_count > 0, then it specifies the number of available
1452  ON__UINT32 elements in the sUTF32[] buffer.
1453 
1454  If sUTF32_count == 0, then the sUTF32 parameter is ignored.
1455 
1456  error_status - [out]
1457  If error_status is not null, then bits of *error_status are
1458  set to indicate the success or failure of the conversion.
1459  When the error_mask parameter is used to used to mask some
1460  conversion errors, multiple bits may be set.
1461  0: Successful conversion with no errors.
1462  1: The input parameters were invalid.
1463  This error cannot be masked.
1464  2: The ouput buffer was not large enough to hold the converted
1465  string. As much conversion as possible is performed in this
1466  case and the error cannot be masked.
1467  4: When parsing a UTF-8 or UTF-32 string, the values of two
1468  consecutive encoding sequences formed a valid UTF-16
1469  surrogate pair.
1470  This error is masked if 0 != (4 & m_error_mask).
1471  If the error is masked, then the surrogate pair is
1472  decoded, the value of the resulting unicode code point
1473  is used, and parsing continues.
1474  8: An overlong UTF-8 encoding sequence was encountered and
1475  the value of the overlong sUTF-8 equence was a valid
1476  unicode code point.
1477  This error is masked if 0 != (8 & m_error_mask).
1478  If the error is masked, then the unicode code point
1479  is used and parsing continues.
1480  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
1481  or an invalid unicode code point value resulted from decoding
1482  a UTF-8 sequence.
1483  This error is masked if 0 != (16 & m_error_mask).
1484  If the error is masked and the value of error_code_point is
1485  a valid unicode code point, then error_code_point is encoded
1486  in the output string and parsing continues.
1487 
1488  error_mask - [in]
1489  If 0 != (error_mask & 4), then type 4 errors are masked.
1490  If 0 != (error_mask & 8), then type 8 errors are masked.
1491  If 0 != (error_mask & 16) and error_code_point is a valid unicode
1492  code point value, then type 16 errors are masked.
1493 
1494  error_code_point - [in]
1495  Unicode code point value to use in when masking type 16 errors.
1496  If 0 == (error_mask & 16), then this parameter is ignored.
1497  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
1498  is a popular choice for the error_code_point value.
1499 
1500  sNextUTF8 - [out]
1501  If sNextUTF8 is not null, then *sNextUTF8 points to the first
1502  element in the input sUTF8[] buffer that was not converted.
1503 
1504  If an error occurs and is not masked, then *sNextUTF8 points to
1505  the element of sUTF8[] where the conversion failed. If no errors
1506  occur or all errors are masked, then *sNextUTF8 points to
1507  sUTF8 + sUTF8_count.
1508 
1509 Returns:
1510  If sUTF32_count > 0, the return value is the number of ON__UINT32
1511  elements written to sUTF32[]. When the return value < sUTF32_count,
1512  a null terminator is written to sUTF32[return value].
1513 
1514  If sUTF32_count == 0, the return value is the minimum number of
1515  ON__UINT32 elements that are needed to hold the converted string.
1516  The return value does not include room for a null terminator.
1517  Increment the return value by one if you want to have an element
1518  to use for a null terminator.
1519 */
1520 ON_DECL
1521 int ON_ConvertUTF8ToUTF32(
1522  int bTestByteOrder,
1523  const char* sUTF8,
1524  int sUTF8_count,
1525  ON__UINT32* sUTF32,
1526  int sUTF32_count,
1527  unsigned int* error_status,
1528  unsigned int error_mask,
1529  ON__UINT32 error_code_point,
1530  const char** sNextUTF8
1531  );
1532 
1533 /*
1534 Description:
1535  Convert a unicode string from a UTF-16 encoded ON__UINT16 array
1536  into a UTF-8 encoded char array.
1537 
1538 Parameters:
1539  bTestByteOrder - [in]
1540  If bTestByteOrder is true and the the input buffer is a
1541  byte order mark (BOM), then the BOM is skipped. It the value
1542  of the BOM is byte swapped, then subsequent input elements are
1543  byte swapped before being decoded. Specifically:
1544  - If the size of an input buffer element is 1 byte and the
1545  values of the first three input elements are a UTF-8 BOM
1546  (0xEF, 0xBB, 0xBF), then the first three input elements are
1547  ignored and decoding begins at the forth input element.
1548  - If the size of an input buffer element is 2 bytes and the value
1549  of the first element is a UTF-16 BOM (0xFEFF), then the first
1550  element is ignored and decoding begins with the second element.
1551  - If the size of an input buffer element is 2 bytes and the value
1552  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
1553  then the first element is ignored, decoding begins with the
1554  second element, and input element bytes are swapped before
1555  being decoded.
1556  - If the size of an input buffer element is 4 bytes and the value
1557  of the first element is a UTF-32 BOM (0x0000FEFF), then the
1558  first element is ignored and decoding begins with the second
1559  element.
1560  - If the size of an input buffer element is 4 bytes and the value
1561  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
1562  then the first element is ignored, decoding begins with the
1563  second element, and input element bytes are swapped before
1564  being decoded.
1565  - In all other cases the first element of the input buffer is
1566  decoded and no byte swapping is performed.
1567 
1568  sUTF16 - [in]
1569  UTF-16 string to convert.
1570 
1571  If bTestByteOrder is true and the first element of sUTF16[]
1572  is 0xFEFF, then this element is skipped and it is assumed
1573  that sUTF16[] is in the CPU's native byte order.
1574 
1575  If bTestByteOrder is true and the first element of sUTF16[]
1576  is 0xFFFE, then this element is skipped and it is assumed
1577  that sUTF16[] is not in the CPU's native byte order and bytes
1578  are swapped before characters are converted.
1579 
1580  If bTestByteOrder is false or the first character of sUTF16[]
1581  is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match
1582  the CPU's byte order.
1583 
1584  sUTF16_count - [in]
1585  If sUTF16_count >= 0, then it specifies the number of
1586  ON__UINT16 elements in sUTF16[] to convert.
1587 
1588  If sUTF16_count == -1, then sUTF16 must be a null terminated
1589  string and all the elements up to the first null element are
1590  converted.
1591 
1592  sUTF8 - [out]
1593  If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
1594  encoded string is returned in this buffer. If there is room
1595  for the null terminator, the converted string will be null
1596  terminated. The null terminator is never included in the count
1597  of returned by this function. The converted string is in the
1598  CPU's native byte order. No byte order mark is prepended.
1599 
1600  sUTF8_count - [in]
1601  If sUTF8_count > 0, then it specifies the number of available
1602  char elements in the sUTF8[] buffer.
1603 
1604  If sUTF8_count == 0, then the sUTF8 parameter is ignored.
1605 
1606  error_status - [out]
1607  If error_status is not null, then bits of *error_status are
1608  set to indicate the success or failure of the conversion.
1609  When the error_mask parameter is used to used to mask some
1610  conversion errors, multiple bits may be set.
1611  0: Successful conversion with no errors.
1612  1: The input parameters were invalid.
1613  This error cannot be masked.
1614  2: The ouput buffer was not large enough to hold the converted
1615  string. As much conversion as possible is performed in this
1616  case and the error cannot be masked.
1617  4: When parsing a UTF-8 or UTF-32 string, the values of two
1618  consecutive encoding sequences formed a valid UTF-16
1619  surrogate pair.
1620  This error is masked if 0 != (4 & m_error_mask).
1621  If the error is masked, then the surrogate pair is
1622  decoded, the value of the resulting unicode code point
1623  is used, and parsing continues.
1624  8: An overlong UTF-8 encoding sequence was encountered and
1625  the value of the overlong sUTF-8 equence was a valid
1626  unicode code point.
1627  This error is masked if 0 != (8 & m_error_mask).
1628  If the error is masked, then the unicode code point
1629  is used and parsing continues.
1630  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
1631  or an invalid unicode code point value resulted from decoding
1632  a UTF-8 sequence.
1633  This error is masked if 0 != (16 & m_error_mask).
1634  If the error is masked and the value of error_code_point is
1635  a valid unicode code point, then error_code_point is encoded
1636  in the output string and parsing continues.
1637 
1638  error_mask - [in]
1639  If 0 != (error_mask & 4), then type 4 errors are masked.
1640  If 0 != (error_mask & 8), then type 8 errors are masked.
1641  If 0 != (error_mask & 16) and error_code_point is a valid unicode
1642  code point value, then type 16 errors are masked.
1643 
1644  error_code_point - [in]
1645  Unicode code point value to use in when masking type 16 errors.
1646  If 0 == (error_mask & 16), then this parameter is ignored.
1647  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
1648  is a popular choice for the error_code_point value.
1649 
1650  sNextUTF16 - [out]
1651  If sNextUTF16 is not null, then *sNextUTF16 points to the first
1652  element in the input sUTF16[] buffer that was not converted.
1653 
1654  If an error occurs and is not masked, then *sNextUTF16 points to
1655  the element of sUTF16[] where the conversion failed. If no errors
1656  occur or all errors are masked, then *sNextUTF16 points to
1657  sUTF16 + sUTF16_count.
1658 
1659 Returns:
1660  If sUTF8_count > 0, the return value is the number of char
1661  elements written to sUTF8[]. When the return value < sUTF8_count,
1662  a null terminator is written to sUTF8[return value].
1663 
1664  If sUTF8_count == 0, the return value is the minimum number of
1665  char elements that are needed to hold the converted string.
1666  The return value does not include room for a null terminator.
1667  Increment the return value by one if you want to have an element
1668  to use for a null terminator.
1669 */
1670 ON_DECL
1671 int ON_ConvertUTF16ToUTF8(
1672  int bTestByteOrder,
1673  const ON__UINT16* sUTF16,
1674  int sUTF16_count,
1675  char* sUTF8,
1676  int sUTF8_count,
1677  unsigned int* error_status,
1678  unsigned int error_mask,
1679  ON__UINT32 error_code_point,
1680  const ON__UINT16** sNextUTF16
1681  );
1682 
1683 /*
1684 Description:
1685  Convert a unicode string from a UTF-16 encoded ON__UINT16 array
1686  into a UTF-16 encoded ON__UINT16 array. This is not simply
1687  a copy in the case when the input has a byte order mark (BOM),
1688  different byte ordering or contains errors. This function can
1689  be used to validate UTF-16 encoded strings.
1690 
1691 Parameters:
1692  bTestByteOrder - [in]
1693  If bTestByteOrder is true and the the input buffer is a
1694  byte order mark (BOM), then the BOM is skipped. It the value
1695  of the BOM is byte swapped, then subsequent input elements are
1696  byte swapped before being decoded. Specifically:
1697  - If the size of an input buffer element is 1 byte and the
1698  values of the first three input elements are a UTF-8 BOM
1699  (0xEF, 0xBB, 0xBF), then the first three input elements are
1700  ignored and decoding begins at the forth input element.
1701  - If the size of an input buffer element is 2 bytes and the value
1702  of the first element is a UTF-16 BOM (0xFEFF), then the first
1703  element is ignored and decoding begins with the second element.
1704  - If the size of an input buffer element is 2 bytes and the value
1705  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
1706  then the first element is ignored, decoding begins with the
1707  second element, and input element bytes are swapped before
1708  being decoded.
1709  - If the size of an input buffer element is 4 bytes and the value
1710  of the first element is a UTF-32 BOM (0x0000FEFF), then the
1711  first element is ignored and decoding begins with the second
1712  element.
1713  - If the size of an input buffer element is 4 bytes and the value
1714  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
1715  then the first element is ignored, decoding begins with the
1716  second element, and input element bytes are swapped before
1717  being decoded.
1718  - In all other cases the first element of the input buffer is
1719  decoded and no byte swapping is performed.
1720 
1721  sInputUTF16 - [in]
1722  UTF-16 encoded string to convert.
1723 
1724  sInputUTF16_count - [in]
1725  If sInputUTF16_count >= 0, then it specifies the number of
1726  ON__UINT16 elements in sInputUTF16[] to convert.
1727 
1728  If sInputUTF16_count == -1, then sInputUTF16 must be a null
1729  terminated array and all the elements up to the first
1730  null element are converted.
1731 
1732  sOutputUTF16 - [out]
1733  If sOutputUTF16 is not null and sOutputUTF16_count > 0, then
1734  the output UTF-16 encoded string is returned in this buffer.
1735  If there is room for the null terminator, the converted string
1736  will be null terminated. The null terminator is never included
1737  in the count returned by this function. No byte order mark is
1738  prepended.
1739 
1740  sOutputUTF16_count - [in]
1741  If sOutputUTF16_count > 0, then it specifies the number of available
1742  ON__UINT16 elements in the sOutputUTF16[] buffer.
1743 
1744  If sOutputUTF16_count == 0, then the sOutputUTF16 parameter is ignored.
1745 
1746  error_status - [out]
1747  If error_status is not null, then bits of *error_status are
1748  set to indicate the success or failure of the conversion.
1749  When the error_mask parameter is used to used to mask some
1750  conversion errors, multiple bits may be set.
1751  0: Successful conversion with no errors.
1752  1: The input parameters were invalid.
1753  This error cannot be masked.
1754  2: The ouput buffer was not large enough to hold the converted
1755  string. As much conversion as possible is performed in this
1756  case and the error cannot be masked.
1757  4: When parsing a UTF-8 or UTF-32 string, the values of two
1758  consecutive encoding sequences formed a valid UTF-16
1759  surrogate pair.
1760  This error is masked if 0 != (4 & m_error_mask).
1761  If the error is masked, then the surrogate pair is
1762  decoded, the value of the resulting unicode code point
1763  is used, and parsing continues.
1764  8: An overlong UTF-8 encoding sequence was encountered and
1765  the value of the overlong sUTF-8 equence was a valid
1766  unicode code point.
1767  This error is masked if 0 != (8 & m_error_mask).
1768  If the error is masked, then the unicode code point
1769  is used and parsing continues.
1770  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
1771  or an invalid unicode code point value resulted from decoding
1772  a UTF-8 sequence.
1773  This error is masked if 0 != (16 & m_error_mask).
1774  If the error is masked and the value of error_code_point is
1775  a valid unicode code point, then error_code_point is encoded
1776  in the output string and parsing continues.
1777 
1778  error_mask - [in]
1779  If 0 != (error_mask & 4), then type 4 errors are masked.
1780  If 0 != (error_mask & 8), then type 8 errors are masked.
1781  If 0 != (error_mask & 16) and error_code_point is a valid unicode
1782  code point value, then type 16 errors are masked.
1783 
1784  error_code_point - [in]
1785  Unicode code point value to use in when masking type 16 errors.
1786  If 0 == (error_mask & 16), then this parameter is ignored.
1787  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
1788  is a popular choice for the error_code_point value.
1789 
1790  sNextInputUTF16 - [out]
1791  If sNextInputUTF16 is not null, then *sNextInputUTF16 points to
1792  the first element in the input sInputUTF16[] buffer that was not
1793  converted.
1794 
1795  If an error occurs and is not masked, then *sNextInputUTF16 points
1796  to the element of sInputUTF16[] where the conversion failed.
1797  If no errors occur or all errors are masked, then
1798  *sNextInputUTF16 points to sInputUTF16 + sInputUTF16_count.
1799 
1800 Returns:
1801  If sOutputUTF16_count > 0, the return value is the number of ON__UINT16
1802  elements written to sOutputUTF16[]. When the return value < sOutputUTF16_count,
1803  a null terminator is written to sOutputUTF16[return value].
1804 
1805  If sOutputUTF16_count == 0, the return value is the minimum number of
1806  ON__UINT16 elements that are needed to hold the converted string.
1807  The return value does not include room for a null terminator.
1808  Increment the return value by one if you want to have an element
1809  to use for a null terminator.
1810 */
1811 ON_DECL
1812 int ON_ConvertUTF16ToUTF16(
1813  int bTestByteOrder,
1814  const ON__UINT16* sInputUTF16,
1815  int sInputUTF16_count,
1816  ON__UINT16* sOutputUTF16,
1817  int sOutputUTF16_count,
1818  unsigned int* error_status,
1819  unsigned int error_mask,
1820  ON__UINT32 error_code_point,
1821  const ON__UINT16** sNextInputUTF16
1822  );
1823 
1824 /*
1825 Description:
1826  Convert a unicode string from a UTF-16 encoded ON__UINT16 array
1827  into a UTF-32 encoded ON__UINT32 array.
1828 
1829 Parameters:
1830  bTestByteOrder - [in]
1831  If bTestByteOrder is true and the the input buffer is a
1832  byte order mark (BOM), then the BOM is skipped. It the value
1833  of the BOM is byte swapped, then subsequent input elements are
1834  byte swapped before being decoded. Specifically:
1835  - If the size of an input buffer element is 1 byte and the
1836  values of the first three input elements are a UTF-8 BOM
1837  (0xEF, 0xBB, 0xBF), then the first three input elements are
1838  ignored and decoding begins at the forth input element.
1839  - If the size of an input buffer element is 2 bytes and the value
1840  of the first element is a UTF-16 BOM (0xFEFF), then the first
1841  element is ignored and decoding begins with the second element.
1842  - If the size of an input buffer element is 2 bytes and the value
1843  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
1844  then the first element is ignored, decoding begins with the
1845  second element, and input element bytes are swapped before
1846  being decoded.
1847  - If the size of an input buffer element is 4 bytes and the value
1848  of the first element is a UTF-32 BOM (0x0000FEFF), then the
1849  first element is ignored and decoding begins with the second
1850  element.
1851  - If the size of an input buffer element is 4 bytes and the value
1852  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
1853  then the first element is ignored, decoding begins with the
1854  second element, and input element bytes are swapped before
1855  being decoded.
1856  - In all other cases the first element of the input buffer is
1857  decoded and no byte swapping is performed.
1858 
1859  sUTF16 - [in]
1860  UTF-16 string to convert.
1861 
1862  If bTestByteOrder is true and the first element of sUTF16[]
1863  is 0xFEFF, then this element is skipped and it is assumed
1864  that sUTF16[] is in the CPU's native byte order.
1865 
1866  If bTestByteOrder is true and the first element of sUTF16[]
1867  is 0xFFFE, then this element is skipped and it is assumed
1868  that sUTF16[] is not in the CPU's native byte order and bytes
1869  are swapped before characters are converted.
1870 
1871  If bTestByteOrder is false or the first character of sUTF16[]
1872  is neither 0xFEFF nor 0xFFFE, then the sUTF16 string must match
1873  the CPU's byte order.
1874 
1875  sUTF16_count - [in]
1876  If sUTF16_count >= 0, then it specifies the number of
1877  ON__UINT16 elements in sUTF16[] to convert.
1878 
1879  If sUTF16_count == -1, then sUTF16 must be a null terminated
1880  string and all the elements up to the first null element are
1881  converted.
1882 
1883  sUTF32 - [out]
1884  If sUTF32 is not null and sUTF32_count > 0, then the UTF-32
1885  encoded string is returned in this buffer. If there is room
1886  for the null terminator, the converted string will be null
1887  terminated. The null terminator is never included in the count
1888  of returned by this function. The converted string is in the
1889  CPU's native byte order. No byte order mark is prepended.
1890 
1891  sUTF32_count - [in]
1892  If sUTF32_count > 0, then it specifies the number of available
1893  ON__UINT32 elements in the sUTF32[] buffer.
1894 
1895  If sUTF32_count == 0, then the sUTF32 parameter is ignored.
1896 
1897  error_status - [out]
1898  If error_status is not null, then bits of *error_status are
1899  set to indicate the success or failure of the conversion.
1900  When the error_mask parameter is used to used to mask some
1901  conversion errors, multiple bits may be set.
1902  0: Successful conversion with no errors.
1903  1: The input parameters were invalid.
1904  This error cannot be masked.
1905  2: The ouput buffer was not large enough to hold the converted
1906  string. As much conversion as possible is performed in this
1907  case and the error cannot be masked.
1908  4: When parsing a UTF-8 or UTF-32 string, the values of two
1909  consecutive encoding sequences formed a valid UTF-16
1910  surrogate pair.
1911  This error is masked if 0 != (4 & m_error_mask).
1912  If the error is masked, then the surrogate pair is
1913  decoded, the value of the resulting unicode code point
1914  is used, and parsing continues.
1915  8: An overlong UTF-8 encoding sequence was encountered and
1916  the value of the overlong sUTF-8 equence was a valid
1917  unicode code point.
1918  This error is masked if 0 != (8 & m_error_mask).
1919  If the error is masked, then the unicode code point
1920  is used and parsing continues.
1921  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
1922  or an invalid unicode code point value resulted from decoding
1923  a UTF-8 sequence.
1924  This error is masked if 0 != (16 & m_error_mask).
1925  If the error is masked and the value of error_code_point is
1926  a valid unicode code point, then error_code_point is encoded
1927  in the output string and parsing continues.
1928 
1929  error_mask - [in]
1930  If 0 != (error_mask & 4), then type 4 errors are masked.
1931  If 0 != (error_mask & 8), then type 8 errors are masked.
1932  If 0 != (error_mask & 16) and error_code_point is a valid unicode
1933  code point value, then type 16 errors are masked.
1934 
1935  error_code_point - [in]
1936  Unicode code point value to use in when masking type 16 errors.
1937  If 0 == (error_mask & 16), then this parameter is ignored.
1938  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
1939  is a popular choice for the error_code_point value.
1940 
1941  sNextUTF16 - [out]
1942  If sNextUTF16 is not null, then *sNextUTF16 points to the first
1943  element in the input sUTF16[] buffer that was not converted.
1944 
1945  If an error occurs and is not masked, then *sNextUTF16 points to
1946  the element of sUTF16[] where the conversion failed. If no errors
1947  occur or all errors are masked, then *sNextUTF16 points to
1948  sUTF16 + sUTF16_count.
1949 
1950 Returns:
1951  If sUTF32_count > 0, the return value is the number of ON__UINT32
1952  elements written to sUTF32[]. When the return value < sUTF32_count,
1953  a null terminator is written to sUTF32[return value].
1954 
1955  If sUTF32_count == 0, the return value is the minimum number of
1956  ON__UINT32 elements that are needed to hold the converted string.
1957  The return value does not include room for a null terminator.
1958  Increment the return value by one if you want to have an element
1959  to use for a null terminator.
1960 */
1961 ON_DECL
1962 int ON_ConvertUTF16ToUTF32(
1963  int bTestByteOrder,
1964  const ON__UINT16* sUTF16,
1965  int sUTF16_count,
1966  unsigned int* sUTF32,
1967  int sUTF32_count,
1968  unsigned int* error_status,
1969  unsigned int error_mask,
1970  ON__UINT32 error_code_point,
1971  const ON__UINT16** sNextUTF16
1972  );
1973 
1974 /*
1975 Description:
1976  Convert a unicode string from a UTF-32 encoded ON__UINT32 array
1977  into a UTF-8 encoded char array.
1978 
1979 Parameters:
1980  bTestByteOrder - [in]
1981  If bTestByteOrder is true and the the input buffer is a
1982  byte order mark (BOM), then the BOM is skipped. It the value
1983  of the BOM is byte swapped, then subsequent input elements are
1984  byte swapped before being decoded. Specifically:
1985  - If the size of an input buffer element is 1 byte and the
1986  values of the first three input elements are a UTF-8 BOM
1987  (0xEF, 0xBB, 0xBF), then the first three input elements are
1988  ignored and decoding begins at the forth input element.
1989  - If the size of an input buffer element is 2 bytes and the value
1990  of the first element is a UTF-16 BOM (0xFEFF), then the first
1991  element is ignored and decoding begins with the second element.
1992  - If the size of an input buffer element is 2 bytes and the value
1993  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
1994  then the first element is ignored, decoding begins with the
1995  second element, and input element bytes are swapped before
1996  being decoded.
1997  - If the size of an input buffer element is 4 bytes and the value
1998  of the first element is a UTF-32 BOM (0x0000FEFF), then the
1999  first element is ignored and decoding begins with the second
2000  element.
2001  - If the size of an input buffer element is 4 bytes and the value
2002  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
2003  then the first element is ignored, decoding begins with the
2004  second element, and input element bytes are swapped before
2005  being decoded.
2006  - In all other cases the first element of the input buffer is
2007  decoded and no byte swapping is performed.
2008 
2009  sUTF32 - [in]
2010  UTF-32 string to convert.
2011 
2012  If bTestByteOrder is true and the first element of sUTF32[]
2013  is 0x0000FEFF, then this element is skipped and it is assumed
2014  that sUTF32[] is in the CPU's native byte order.
2015 
2016  If bTestByteOrder is true and the first element of sUTF32[]
2017  is 0xFFFE0000, then this element is skipped and it is assumed
2018  that sUTF32[] is not in the CPU's native byte order and bytes
2019  are swapped before characters are converted.
2020 
2021  If bTestByteOrder is false or the first character of sUTF32[]
2022  is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string
2023  must match the CPU's byte order.
2024 
2025  sUTF32_count - [in]
2026  If sUTF32_count >= 0, then it specifies the number of
2027  ON__UINT32 elements in sUTF32[] to convert.
2028 
2029  If sUTF32_count == -1, then sUTF32 must be a null terminated
2030  string and all the elements up to the first null element are
2031  converted.
2032 
2033  sUTF8 - [out]
2034  If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
2035  encoded string is returned in this buffer. If there is room
2036  for the null terminator, the converted string will be null
2037  terminated. The null terminator is never included in the count
2038  of returned by this function. The converted string is in the
2039  CPU's native byte order. No byte order mark is prepended.
2040 
2041  sUTF8_count - [in]
2042  If sUTF8_count > 0, then it specifies the number of available
2043  char elements in the sUTF8[] buffer.
2044 
2045  If sUTF8_count == 0, then the sUTF8 parameter is ignored.
2046 
2047  error_status - [out]
2048  If error_status is not null, then bits of *error_status are
2049  set to indicate the success or failure of the conversion.
2050  When the error_mask parameter is used to used to mask some
2051  conversion errors, multiple bits may be set.
2052  0: Successful conversion with no errors.
2053  1: The input parameters were invalid.
2054  This error cannot be masked.
2055  2: The ouput buffer was not large enough to hold the converted
2056  string. As much conversion as possible is performed in this
2057  case and the error cannot be masked.
2058  4: When parsing a UTF-8 or UTF-32 string, the values of two
2059  consecutive encoding sequences formed a valid UTF-16
2060  surrogate pair.
2061  This error is masked if 0 != (4 & m_error_mask).
2062  If the error is masked, then the surrogate pair is
2063  decoded, the value of the resulting unicode code point
2064  is used, and parsing continues.
2065  8: An overlong UTF-8 encoding sequence was encountered and
2066  the value of the overlong sUTF-8 equence was a valid
2067  unicode code point.
2068  This error is masked if 0 != (8 & m_error_mask).
2069  If the error is masked, then the unicode code point
2070  is used and parsing continues.
2071  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
2072  or an invalid unicode code point value resulted from decoding
2073  a UTF-8 sequence.
2074  This error is masked if 0 != (16 & m_error_mask).
2075  If the error is masked and the value of error_code_point is
2076  a valid unicode code point, then error_code_point is encoded
2077  in the output string and parsing continues.
2078 
2079  error_mask - [in]
2080  If 0 != (error_mask & 4), then type 4 errors are masked.
2081  If 0 != (error_mask & 8), then type 8 errors are masked.
2082  If 0 != (error_mask & 16) and error_code_point is a valid unicode
2083  code point value, then type 16 errors are masked.
2084 
2085  error_code_point - [in]
2086  Unicode code point value to use in when masking type 16 errors.
2087  If 0 == (error_mask & 16), then this parameter is ignored.
2088  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
2089  is a popular choice for the error_code_point value.
2090 
2091  sNextUTF32 - [out]
2092  If sNextUTF32 is not null, then *sNextUTF32 points to the first
2093  element in the input sUTF32[] buffer that was not converted.
2094 
2095  If an error occurs and is not masked, then *sNextUTF32 points to
2096  the element of sUTF32[] where the conversion failed. If no errors
2097  occur or all errors are masked, then *sNextUTF32 points to
2098  sUTF32 + sUTF32_count.
2099 
2100 Returns:
2101  If sUTF8_count > 0, the return value is the number of char
2102  elements written to sUTF8[]. When the return value < sUTF8_count,
2103  a null terminator is written to sUTF8[return value].
2104 
2105  If sUTF8_count == 0, the return value is the minimum number of
2106  char elements that are needed to hold the converted string.
2107  The return value does not include room for a null terminator.
2108  Increment the return value by one if you want to have an element
2109  to use for a null terminator.
2110 */
2111 ON_DECL
2112 int ON_ConvertUTF32ToUTF8(
2113  int bTestByteOrder,
2114  const ON__UINT32* sUTF32,
2115  int sUTF32_count,
2116  char* sUTF8,
2117  int sUTF8_count,
2118  unsigned int* error_status,
2119  unsigned int error_mask,
2120  ON__UINT32 error_code_point,
2121  const ON__UINT32** sNextUTF32
2122  );
2123 
2124 /*
2125 Description:
2126  Convert a unicode string from a UTF-32 encoded ON__UINT32 array
2127  into a UTF-16 encoded ON__UINT16 array.
2128 
2129 Parameters:
2130  bTestByteOrder - [in]
2131  If bTestByteOrder is true and the the input buffer is a
2132  byte order mark (BOM), then the BOM is skipped. It the value
2133  of the BOM is byte swapped, then subsequent input elements are
2134  byte swapped before being decoded. Specifically:
2135  - If the size of an input buffer element is 1 byte and the
2136  values of the first three input elements are a UTF-8 BOM
2137  (0xEF, 0xBB, 0xBF), then the first three input elements are
2138  ignored and decoding begins at the forth input element.
2139  - If the size of an input buffer element is 2 bytes and the value
2140  of the first element is a UTF-16 BOM (0xFEFF), then the first
2141  element is ignored and decoding begins with the second element.
2142  - If the size of an input buffer element is 2 bytes and the value
2143  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
2144  then the first element is ignored, decoding begins with the
2145  second element, and input element bytes are swapped before
2146  being decoded.
2147  - If the size of an input buffer element is 4 bytes and the value
2148  of the first element is a UTF-32 BOM (0x0000FEFF), then the
2149  first element is ignored and decoding begins with the second
2150  element.
2151  - If the size of an input buffer element is 4 bytes and the value
2152  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
2153  then the first element is ignored, decoding begins with the
2154  second element, and input element bytes are swapped before
2155  being decoded.
2156  - In all other cases the first element of the input buffer is
2157  decoded and no byte swapping is performed.
2158 
2159  sUTF32 - [in]
2160  UTF-32 string to convert.
2161 
2162  If bTestByteOrder is true and the first element of sUTF32[]
2163  is 0x0000FEFF, then this element is skipped and it is assumed
2164  that sUTF32[] is in the CPU's native byte order.
2165 
2166  If bTestByteOrder is true and the first element of sUTF32[]
2167  is 0xFFFE0000, then this element is skipped and it is assumed
2168  that sUTF32[] is not in the CPU's native byte order and bytes
2169  are swapped before characters are converted.
2170 
2171  If bTestByteOrder is false or the first character of sUTF32[]
2172  is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string
2173  must match the CPU's byte order.
2174 
2175  sUTF32_count - [in]
2176  If sUTF32_count >= 0, then it specifies the number of
2177  ON__UINT32 elements in sUTF32[] to convert.
2178 
2179  If sUTF32_count == -1, then sUTF32 must be a null terminated
2180  string and all the elements up to the first null element are
2181  converted.
2182 
2183  sUTF16 - [out]
2184  If sUTF16 is not null and sUTF16_count > 0, then the UTF-16
2185  encoded string is returned in this buffer. If there is room
2186  for the null terminator, the converted string will be null
2187  terminated. The null terminator is never included in the count
2188  of returned by this function. The converted string is in the
2189  CPU's native byte order. No byte order mark is prepended.
2190 
2191  sUTF16_count - [in]
2192  If sUTF16_count > 0, then it specifies the number of available
2193  ON__UINT16 elements in the sUTF16[] buffer.
2194 
2195  If sUTF16_count == 0, then the sUTF16 parameter is ignored.
2196 
2197  error_status - [out]
2198  If error_status is not null, then bits of *error_status are
2199  set to indicate the success or failure of the conversion.
2200  When the error_mask parameter is used to used to mask some
2201  conversion errors, multiple bits may be set.
2202  0: Successful conversion with no errors.
2203  1: The input parameters were invalid.
2204  This error cannot be masked.
2205  2: The ouput buffer was not large enough to hold the converted
2206  string. As much conversion as possible is performed in this
2207  case and the error cannot be masked.
2208  4: When parsing a UTF-8 or UTF-32 string, the values of two
2209  consecutive encoding sequences formed a valid UTF-16
2210  surrogate pair.
2211  This error is masked if 0 != (4 & m_error_mask).
2212  If the error is masked, then the surrogate pair is
2213  decoded, the value of the resulting unicode code point
2214  is used, and parsing continues.
2215  8: An overlong UTF-8 encoding sequence was encountered and
2216  the value of the overlong sUTF-8 equence was a valid
2217  unicode code point.
2218  This error is masked if 0 != (8 & m_error_mask).
2219  If the error is masked, then the unicode code point
2220  is used and parsing continues.
2221  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
2222  or an invalid unicode code point value resulted from decoding
2223  a UTF-8 sequence.
2224  This error is masked if 0 != (16 & m_error_mask).
2225  If the error is masked and the value of error_code_point is
2226  a valid unicode code point, then error_code_point is encoded
2227  in the output string and parsing continues.
2228 
2229  error_mask - [in]
2230  If 0 != (error_mask & 4), then type 4 errors are masked.
2231  If 0 != (error_mask & 8), then type 8 errors are masked.
2232  If 0 != (error_mask & 16) and error_code_point is a valid unicode
2233  code point value, then type 16 errors are masked.
2234 
2235  error_code_point - [in]
2236  Unicode code point value to use in when masking type 16 errors.
2237  If 0 == (error_mask & 16), then this parameter is ignored.
2238  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
2239  is a popular choice for the error_code_point value.
2240 
2241  sNextUnicode - [out]
2242  If sNextUnicode is not null, then *sNextUnicode points to the first
2243  byte in the input sNextUnicode[] buffer that was not converted.
2244 
2245  If an error occurs and is not masked, then this unsigned int
2246  will be an illegal unicode code point value.
2247 
2248  If an error does not occur, then (*sNextUnicode - sUnicode)
2249  is the number of values converted.
2250 
2251 Returns:
2252  If sUTF16_count > 0, the return value is the number of ON__UINT16
2253  elements written to sUTF16[]. When the return value < sUTF16_count,
2254  a null terminator is written to sUTF16[return value].
2255 
2256  If sUTF16_count == 0, the return value is the minimum number of
2257  ON__UINT16 elements that are needed to hold the converted string.
2258  The return value does not include room for a null terminator.
2259  Increment the return value by one if you want to have an element
2260  to use for a null terminator.
2261 */
2262 ON_DECL
2263 int ON_ConvertUTF32ToUTF16(
2264  int bTestByteOrder,
2265  const ON__UINT32* sUTF32,
2266  int sUTF32_count,
2267  ON__UINT16* sUTF16,
2268  int sUTF16_count,
2269  unsigned int* error_status,
2270  unsigned int error_mask,
2271  ON__UINT32 error_code_point,
2272  const ON__UINT32** sNextUTF32
2273  );
2274 
2275 /*
2276 Description:
2277  Convert a unicode string from a UTF-32 encoded ON__UINT32 array
2278  into a UTF-32 encoded ON__UINT32 array. This is not simply
2279  a copy in the case when the input has a byte order mark (BOM),
2280  different byte ordering or contains errors. This function can
2281  be used to validate UTF-32 encoded strings.
2282 
2283 Parameters:
2284  bTestByteOrder - [in]
2285  If bTestByteOrder is true and the the input buffer is a
2286  byte order mark (BOM), then the BOM is skipped. It the value
2287  of the BOM is byte swapped, then subsequent input elements are
2288  byte swapped before being decoded. Specifically:
2289  - If the size of an input buffer element is 1 byte and the
2290  values of the first three input elements are a UTF-8 BOM
2291  (0xEF, 0xBB, 0xBF), then the first three input elements are
2292  ignored and decoding begins at the forth input element.
2293  - If the size of an input buffer element is 2 bytes and the value
2294  of the first element is a UTF-16 BOM (0xFEFF), then the first
2295  element is ignored and decoding begins with the second element.
2296  - If the size of an input buffer element is 2 bytes and the value
2297  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
2298  then the first element is ignored, decoding begins with the
2299  second element, and input element bytes are swapped before
2300  being decoded.
2301  - If the size of an input buffer element is 4 bytes and the value
2302  of the first element is a UTF-32 BOM (0x0000FEFF), then the
2303  first element is ignored and decoding begins with the second
2304  element.
2305  - If the size of an input buffer element is 4 bytes and the value
2306  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
2307  then the first element is ignored, decoding begins with the
2308  second element, and input element bytes are swapped before
2309  being decoded.
2310  - In all other cases the first element of the input buffer is
2311  decoded and no byte swapping is performed.
2312 
2313  sInputUTF32 - [in]
2314  UTF-32 string to convert.
2315 
2316  If bTestByteOrder is true and the first element of sInputUTF32[]
2317  is 0x0000FEFF, then this element is skipped and it is assumed
2318  that sInputUTF32[] is in the CPU's native byte order.
2319 
2320  If bTestByteOrder is true and the first element of sInputUTF32[]
2321  is 0xFFFE0000, then this element is skipped and it is assumed
2322  that sInputUTF32[] is not in the CPU's native byte order and bytes
2323  are swapped before characters are converted.
2324 
2325  If bTestByteOrder is false or the first character of sUTF32[]
2326  is neither 0x0000FEFF nor 0xFFFE0000, then the sUTF32 string
2327  must match the CPU's byte order.
2328 
2329  sInputUTF32_count - [in]
2330  If sInputUTF32_count >= 0, then it specifies the number of
2331  ON__UINT32 elements in sInputUTF32[] to convert.
2332 
2333  If sInputUTF32_count == -1, then sInputUTF32 must be a null
2334  terminated string and all the elements up to the first null
2335  element are converted.
2336 
2337  sOutputUTF32 - [out]
2338  If sOutputUTF32 is not null and sOutputUTF32_count > 0, then
2339  the UTF-32 encoded string is returned in this buffer. If there
2340  is room for the null terminator, the converted string will be null
2341  terminated. The null terminator is never included in the count
2342  of returned by this function. The converted string is in the
2343  CPU's native byte order. No byte order mark is prepended.
2344 
2345  sOutputUTF32_count - [in]
2346  If sOutputUTF32_count > 0, then it specifies the number of available
2347  ON__UINT32 elements in the sOutputUTF32[] buffer.
2348 
2349  If sOutputUTF32_count == 0, then the sOutputUTF32 parameter
2350  is ignored. This is useful when you want to validate a UTF-32
2351  formatted string.
2352 
2353  error_status - [out]
2354  If error_status is not null, then bits of *error_status are
2355  set to indicate the success or failure of the conversion.
2356  When the error_mask parameter is used to used to mask some
2357  conversion errors, multiple bits may be set.
2358  0: Successful conversion with no errors.
2359  1: The input parameters were invalid.
2360  This error cannot be masked.
2361  2: The ouput buffer was not large enough to hold the converted
2362  string. As much conversion as possible is performed in this
2363  case and the error cannot be masked.
2364  4: When parsing a UTF-8 or UTF-32 string, the values of two
2365  consecutive encoding sequences formed a valid UTF-16
2366  surrogate pair.
2367  This error is masked if 0 != (4 & m_error_mask).
2368  If the error is masked, then the surrogate pair is
2369  decoded, the value of the resulting unicode code point
2370  is used, and parsing continues.
2371  8: An overlong UTF-8 encoding sequence was encountered and
2372  the value of the overlong sUTF-8 equence was a valid
2373  unicode code point.
2374  This error is masked if 0 != (8 & m_error_mask).
2375  If the error is masked, then the unicode code point
2376  is used and parsing continues.
2377  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
2378  or an invalid unicode code point value resulted from decoding
2379  a UTF-8 sequence.
2380  This error is masked if 0 != (16 & m_error_mask).
2381  If the error is masked and the value of error_code_point is
2382  a valid unicode code point, then error_code_point is encoded
2383  in the output string and parsing continues.
2384 
2385  error_mask - [in]
2386  If 0 != (error_mask & 4), then type 4 errors are masked.
2387  If 0 != (error_mask & 8), then type 8 errors are masked.
2388  If 0 != (error_mask & 16) and error_code_point is a valid unicode
2389  code point value, then type 16 errors are masked.
2390 
2391  error_code_point - [in]
2392  Unicode code point value to use in when masking type 16 errors.
2393  If 0 == (error_mask & 16), then this parameter is ignored.
2394  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
2395  is a popular choice for the error_code_point value.
2396 
2397  sNextInputUTF32 - [out]
2398  If sNextInputUTF32 is not null, then *sNextInputUTF32 points to
2399  the first element in the input sInputUTF32[] buffer that was not
2400  converted.
2401 
2402  If an error occurs and is not masked, then this unsigned int
2403  will be an illegal unicode code point value.
2404 
2405  If an error does not occur, then (*sNextInputUTF32 - sInputUTF32)
2406  is the number of values converted.
2407 
2408 Returns:
2409  If sOutputUTF32_count > 0, the return value is the number of ON__UINT32
2410  elements written to sOutputUTF32[].
2411  When the return value < sOutputUTF32_count,
2412  a null terminator is written to sOutputUTF32[return value].
2413 
2414  If sOutputUTF32_count == 0, the return value is the minimum number of
2415  ON__UINT32 elements that are needed to hold the converted string.
2416  The return value does not include room for a null terminator.
2417  Increment the return value by one if you want to have an element
2418  to use for a null terminator.
2419 */
2420 ON_DECL
2421 int ON_ConvertUTF32ToUTF32(
2422  int bTestByteOrder,
2423  const ON__UINT32* sInputUTF32,
2424  int sInputUTF32_count,
2425  ON__UINT32* sOuputUTF32,
2426  int sOutputUTF32_count,
2427  unsigned int* error_status,
2428  unsigned int error_mask,
2429  ON__UINT32 error_code_point,
2430  const ON__UINT32** sNextInputUTF32
2431  );
2432 
2433 /*
2434 Description:
2435  Convert a wchar_t string using the native platform's most common
2436  encoding into a unicode string encoded as a UTF-8 char array.
2437 
2438  If 1 = sizeof(wchar_t), then the wchar_t array is assumed to be
2439  a UTF-8 encoded string.
2440 
2441  If 2 = sizeof(wchar_t), then the wchar_t array is assumed to be
2442  a UTF-16 encoded string. This is the case with current versions
2443  of Microsoft Windows.
2444 
2445  If 4 = sizeof(wchar_t), then the wchar_t array is assumed to be
2446  a UTF-32 encoded string. This is the case with current versions
2447  of Apple OSX.
2448 
2449 Parameters:
2450  bTestByteOrder - [in]
2451  If bTestByteOrder is true and the the input buffer is a
2452  byte order mark (BOM), then the BOM is skipped. It the value
2453  of the BOM is byte swapped, then subsequent input elements are
2454  byte swapped before being decoded. Specifically:
2455  - If the size of an input buffer element is 1 byte and the
2456  values of the first three input elements are a UTF-8 BOM
2457  (0xEF, 0xBB, 0xBF), then the first three input elements are
2458  ignored and decoding begins at the forth input element.
2459  - If the size of an input buffer element is 2 bytes and the value
2460  of the first element is a UTF-16 BOM (0xFEFF), then the first
2461  element is ignored and decoding begins with the second element.
2462  - If the size of an input buffer element is 2 bytes and the value
2463  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
2464  then the first element is ignored, decoding begins with the
2465  second element, and input element bytes are swapped before
2466  being decoded.
2467  - If the size of an input buffer element is 4 bytes and the value
2468  of the first element is a UTF-32 BOM (0x0000FEFF), then the
2469  first element is ignored and decoding begins with the second
2470  element.
2471  - If the size of an input buffer element is 4 bytes and the value
2472  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
2473  then the first element is ignored, decoding begins with the
2474  second element, and input element bytes are swapped before
2475  being decoded.
2476  - In all other cases the first element of the input buffer is
2477  decoded and no byte swapping is performed.
2478 
2479  sWideChar - [in]
2480  wchar_t input string to convert.
2481 
2482  sWideChar_count - [in]
2483  If sWideChar_count >= 0, then it specifies the number of
2484  wchar_t elements in sWideChar[] to convert.
2485 
2486  If sWideChar_count == -1, then sWideChar must be a null terminated
2487  array and all the elements up to the first null element are
2488  converted.
2489 
2490  sUTF8 - [out]
2491  If sUTF8 is not null and sUTF8_count > 0, then the UTF-8
2492  encoded string is returned in this buffer. If there is room
2493  for the null terminator, the converted string will be null
2494  terminated. The null terminator is never included in the count
2495  of returned by this function. The converted string is in the
2496  CPU's native byte order. No byte order mark is prepended.
2497 
2498  sUTF8_count - [in]
2499  If sUTF8_count > 0, then it specifies the number of available
2500  char elements in the sUTF8[] buffer.
2501 
2502  If sUTF8_count == 0, then the sUTF8 parameter is ignored.
2503 
2504  error_status - [out]
2505  If error_status is not null, then bits of *error_status are
2506  set to indicate the success or failure of the conversion.
2507  When the error_mask parameter is used to used to mask some
2508  conversion errors, multiple bits may be set.
2509  0: Successful conversion with no errors.
2510  1: The input parameters were invalid.
2511  This error cannot be masked.
2512  2: The ouput buffer was not large enough to hold the converted
2513  string. As much conversion as possible is performed in this
2514  case and the error cannot be masked.
2515  4: When parsing a UTF-8 or UTF-32 string, the values of two
2516  consecutive encoding sequences formed a valid UTF-16
2517  surrogate pair.
2518  This error is masked if 0 != (4 & m_error_mask).
2519  If the error is masked, then the surrogate pair is
2520  decoded, the value of the resulting unicode code point
2521  is used, and parsing continues.
2522  8: An overlong UTF-8 encoding sequence was encountered and
2523  the value of the overlong sUTF-8 equence was a valid
2524  unicode code point.
2525  This error is masked if 0 != (8 & m_error_mask).
2526  If the error is masked, then the unicode code point
2527  is used and parsing continues.
2528  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
2529  or an invalid unicode code point value resulted from decoding
2530  a UTF-8 sequence.
2531  This error is masked if 0 != (16 & m_error_mask).
2532  If the error is masked and the value of error_code_point is
2533  a valid unicode code point, then error_code_point is encoded
2534  in the output string and parsing continues.
2535 
2536  error_mask - [in]
2537  If 0 != (error_mask & 4), then type 4 errors are masked.
2538  If 0 != (error_mask & 8), then type 8 errors are masked.
2539  If 0 != (error_mask & 16) and error_code_point is a valid unicode
2540  code point value, then type 16 errors are masked.
2541 
2542  error_code_point - [in]
2543  Unicode code point value to use in when masking type 16 errors.
2544  If 0 == (error_mask & 16), then this parameter is ignored.
2545  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
2546  is a popular choice for the error_code_point value.
2547 
2548  sNextWideChar - [out]
2549  If sNextWideChar is not null, then *sNextWideChar points to the first
2550  element in the input sWideChar[] buffer that was not converted.
2551 
2552  If an error occurs and is not masked, then *sNextWideChar points to
2553  the element of sWideChar[] where the conversion failed. If no errors
2554  occur or all errors are masked, then *sNextWideChar points to
2555  sWideChar + sWideChar_count.
2556 
2557  If sUTF8_count > 0, the return value is the number of char
2558  elements written to sUTF8[]. When the return value < sUTF8_count,
2559  a null terminator is written to sUTF8[return value].
2560 
2561  If sUTF8_count == 0, the return value is the minimum number of
2562  char elements that are needed to hold the converted string.
2563  The return value does not include room for a null terminator.
2564  Increment the return value by one if you want to have an element
2565  to use for a null terminator.
2566 */
2567 ON_DECL
2568 int ON_ConvertWideCharToUTF8(
2569  int bTestByteOrder,
2570  const wchar_t* sWideChar,
2571  int sWideChar_count,
2572  char* sUTF8,
2573  int sUTF8_count,
2574  unsigned int* error_status,
2575  unsigned int error_mask,
2576  ON__UINT32 error_code_point,
2577  const wchar_t** sNextWideChar
2578  );
2579 
2580 /*
2581 Description:
2582  Convert a wchar_t string using the native platform's most common
2583  encoding into a unicode string encoded as a UTF-16 ON__UINT16 array.
2584 
2585  If 1 = sizeof(wchar_t), then the wchar_t array is assumed to be
2586  a UTF-8 encoded string.
2587 
2588  If 2 = sizeof(wchar_t), then the wchar_t array is assumed to be
2589  a UTF-16 encoded string. This is the case with current versions
2590  of Microsoft Windows.
2591 
2592  If 4 = sizeof(wchar_t), then the wchar_t array is assumed to be
2593  a UTF-32 encoded string. This is the case with current versions
2594  of Apple OS X.
2595 
2596 Parameters:
2597  bTestByteOrder - [in]
2598  If bTestByteOrder is true and the the input buffer is a
2599  byte order mark (BOM), then the BOM is skipped. It the value
2600  of the BOM is byte swapped, then subsequent input elements are
2601  byte swapped before being decoded. Specifically:
2602  - If the size of an input buffer element is 1 byte and the
2603  values of the first three input elements are a UTF-8 BOM
2604  (0xEF, 0xBB, 0xBF), then the first three input elements are
2605  ignored and decoding begins at the forth input element.
2606  - If the size of an input buffer element is 2 bytes and the value
2607  of the first element is a UTF-16 BOM (0xFEFF), then the first
2608  element is ignored and decoding begins with the second element.
2609  - If the size of an input buffer element is 2 bytes and the value
2610  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
2611  then the first element is ignored, decoding begins with the
2612  second element, and input element bytes are swapped before
2613  being decoded.
2614  - If the size of an input buffer element is 4 bytes and the value
2615  of the first element is a UTF-32 BOM (0x0000FEFF), then the
2616  first element is ignored and decoding begins with the second
2617  element.
2618  - If the size of an input buffer element is 4 bytes and the value
2619  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
2620  then the first element is ignored, decoding begins with the
2621  second element, and input element bytes are swapped before
2622  being decoded.
2623  - In all other cases the first element of the input buffer is
2624  decoded and no byte swapping is performed.
2625 
2626  sWideChar - [in]
2627  wchar_t input string to convert.
2628 
2629  sWideChar_count - [in]
2630  If sWideChar_count >= 0, then it specifies the number of
2631  wchar_t elements in sWideChar[] to convert.
2632 
2633  If sWideChar_count == -1, then sWideChar must be a null terminated
2634  array and all the elements up to the first null element are
2635  converted.
2636 
2637  sUTF16 - [out]
2638  If sUTF16 is not null and sUTF16_count > 0, then the UTF-16
2639  encoded string is returned in this buffer. If there is room
2640  for the null terminator, the converted string will be null
2641  terminated. The null terminator is never included in the count
2642  of returned by this function. The converted string is in the
2643  CPU's native byte order. No byte order mark is prepended.
2644 
2645  sUTF16_count - [in]
2646  If sUTF16_count > 0, then it specifies the number of available
2647  ON__UINT16 elements in the sUTF16[] buffer.
2648 
2649  If sUTF16_count == 0, then the sUTF16 parameter is ignored.
2650 
2651  error_status - [out]
2652  If error_status is not null, then bits of *error_status are
2653  set to indicate the success or failure of the conversion.
2654  When the error_mask parameter is used to used to mask some
2655  conversion errors, multiple bits may be set.
2656  0: Successful conversion with no errors.
2657  1: The input parameters were invalid.
2658  This error cannot be masked.
2659  2: The ouput buffer was not large enough to hold the converted
2660  string. As much conversion as possible is performed in this
2661  case and the error cannot be masked.
2662  4: When parsing a UTF-8 or UTF-32 string, the values of two
2663  consecutive encoding sequences formed a valid UTF-16
2664  surrogate pair.
2665  This error is masked if 0 != (4 & m_error_mask).
2666  If the error is masked, then the surrogate pair is
2667  decoded, the value of the resulting unicode code point
2668  is used, and parsing continues.
2669  8: An overlong UTF-8 encoding sequence was encountered and
2670  the value of the overlong sUTF-8 equence was a valid
2671  unicode code point.
2672  This error is masked if 0 != (8 & m_error_mask).
2673  If the error is masked, then the unicode code point
2674  is used and parsing continues.
2675  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
2676  or an invalid unicode code point value resulted from decoding
2677  a UTF-8 sequence.
2678  This error is masked if 0 != (16 & m_error_mask).
2679  If the error is masked and the value of error_code_point is
2680  a valid unicode code point, then error_code_point is encoded
2681  in the output string and parsing continues.
2682 
2683  error_mask - [in]
2684  If 0 != (error_mask & 4), then type 4 errors are masked.
2685  If 0 != (error_mask & 8), then type 8 errors are masked.
2686  If 0 != (error_mask & 16) and error_code_point is a valid unicode
2687  code point value, then type 16 errors are masked.
2688 
2689  error_code_point - [in]
2690  Unicode code point value to use in when masking type 16 errors.
2691  If 0 == (error_mask & 16), then this parameter is ignored.
2692  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
2693  is a popular choice for the error_code_point value.
2694 
2695  sNextWideChar - [out]
2696  If sNextWideChar is not null, then *sNextWideChar points to the first
2697  element in the input sWideChar[] buffer that was not converted.
2698 
2699  If an error occurs and is not masked, then *sNextWideChar points to
2700  the element of sWideChar[] where the conversion failed. If no errors
2701  occur or all errors are masked, then *sNextWideChar points to
2702  sWideChar + sWideChar_count.
2703 
2704  If sUTF16_count > 0, the return value is the number of ON__UINT16
2705  elements written to sUTF16[]. When the return value < sUTF16_count,
2706  a null terminator is written to sUTF16[return value].
2707 
2708  If sUTF16_count == 0, the return value is the minimum number of
2709  ON__UINT16 elements that are needed to hold the converted string.
2710  The return value does not include room for a null terminator.
2711  Increment the return value by one if you want to have an element
2712  to use for a null terminator.
2713 */
2714 ON_DECL
2715 int ON_ConvertWideCharToUTF16(
2716  int bTestByteOrder,
2717  const wchar_t* sWideChar,
2718  int sWideChar_count,
2719  ON__UINT16* sUTF16,
2720  int sUTF16_count,
2721  unsigned int* error_status,
2722  unsigned int error_mask,
2723  ON__UINT32 error_code_point,
2724  const wchar_t** sNextWideChar
2725  );
2726 
2727 
2728 /*
2729 Description:
2730  Convert a wchar_t string using the native platform's most common
2731  encoding into a unicode string encoded as a UTF-32 char array.
2732 
2733  If 1 = sizeof(wchar_t), then the wchar_t array is assumed to be
2734  a UTF-8 encoded string.
2735 
2736  If 2 = sizeof(wchar_t), then the wchar_t array is assumed to be
2737  a UTF-16 encoded string. This is the case with current versions
2738  of Microsoft Windows.
2739 
2740  If 4 = sizeof(wchar_t), then the wchar_t array is assumed to be
2741  a UTF-32 encoded string. This is the case with current versions
2742  of Apple OSX.
2743 
2744 Parameters:
2745  bTestByteOrder - [in]
2746  If bTestByteOrder is true and the the input buffer is a
2747  byte order mark (BOM), then the BOM is skipped. It the value
2748  of the BOM is byte swapped, then subsequent input elements are
2749  byte swapped before being decoded. Specifically:
2750  - If the size of an input buffer element is 1 byte and the
2751  values of the first three input elements are a UTF-8 BOM
2752  (0xEF, 0xBB, 0xBF), then the first three input elements are
2753  ignored and decoding begins at the forth input element.
2754  - If the size of an input buffer element is 2 bytes and the value
2755  of the first element is a UTF-16 BOM (0xFEFF), then the first
2756  element is ignored and decoding begins with the second element.
2757  - If the size of an input buffer element is 2 bytes and the value
2758  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
2759  then the first element is ignored, decoding begins with the
2760  second element, and input element bytes are swapped before
2761  being decoded.
2762  - If the size of an input buffer element is 4 bytes and the value
2763  of the first element is a UTF-32 BOM (0x0000FEFF), then the
2764  first element is ignored and decoding begins with the second
2765  element.
2766  - If the size of an input buffer element is 4 bytes and the value
2767  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
2768  then the first element is ignored, decoding begins with the
2769  second element, and input element bytes are swapped before
2770  being decoded.
2771  - In all other cases the first element of the input buffer is
2772  decoded and no byte swapping is performed.
2773 
2774  sWideChar - [in]
2775  wchar_t string to convert.
2776 
2777  sWideChar_count - [in]
2778  If sWideChar_count >= 0, then it specifies the number of
2779  wchar_t elements in sWideChar[] to convert.
2780 
2781  If sWideChar_count == -1, then sWideChar must be a null terminated
2782  string and all the elements up to the first null element are
2783  converted.
2784 
2785  sUTF32 - [out]
2786  If sUTF32 is not null and sUTF32_count > 0, then the UTF-32
2787  encoded string is returned in this buffer. If there is room
2788  for the null terminator, the converted string will be null
2789  terminated. The null terminator is never included in the count
2790  of returned by this function. The converted string is in the
2791  CPU's native byte order. No byte order mark is prepended.
2792 
2793  sUTF32_count - [in]
2794  If sUTF32_count > 0, then it specifies the number of available
2795  ON__UINT32 elements in the sUTF32[] buffer.
2796 
2797  If sUTF32_count == 0, then the sUTF32 parameter is ignored.
2798 
2799  error_status - [out]
2800  If error_status is not null, then bits of *error_status are
2801  set to indicate the success or failure of the conversion.
2802  When the error_mask parameter is used to used to mask some
2803  conversion errors, multiple bits may be set.
2804  0: Successful conversion with no errors.
2805  1: The input parameters were invalid.
2806  This error cannot be masked.
2807  2: The ouput buffer was not large enough to hold the converted
2808  string. As much conversion as possible is performed in this
2809  case and the error cannot be masked.
2810  4: When parsing a UTF-8 or UTF-32 string, the values of two
2811  consecutive encoding sequences formed a valid UTF-16
2812  surrogate pair.
2813  This error is masked if 0 != (4 & m_error_mask).
2814  If the error is masked, then the surrogate pair is
2815  decoded, the value of the resulting unicode code point
2816  is used, and parsing continues.
2817  8: An overlong UTF-8 encoding sequence was encountered and
2818  the value of the overlong sUTF-8 equence was a valid
2819  unicode code point.
2820  This error is masked if 0 != (8 & m_error_mask).
2821  If the error is masked, then the unicode code point
2822  is used and parsing continues.
2823  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
2824  or an invalid unicode code point value resulted from decoding
2825  a UTF-8 sequence.
2826  This error is masked if 0 != (16 & m_error_mask).
2827  If the error is masked and the value of error_code_point is
2828  a valid unicode code point, then error_code_point is encoded
2829  in the output string and parsing continues.
2830 
2831  error_mask - [in]
2832  If 0 != (error_mask & 4), then type 4 errors are masked.
2833  If 0 != (error_mask & 8), then type 8 errors are masked.
2834  If 0 != (error_mask & 16) and error_code_point is a valid unicode
2835  code point value, then type 16 errors are masked.
2836 
2837  error_code_point - [in]
2838  Unicode code point value to use in when masking type 16 errors.
2839  If 0 == (error_mask & 16), then this parameter is ignored.
2840  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
2841  is a popular choice for the error_code_point value.
2842 
2843  sNextWideChar - [out]
2844  If sNextWideChar is not null, then *sNextWideChar points to the first
2845  element in the input sWideChar[] buffer that was not converted.
2846 
2847  If an error occurs and is not masked, then *sNextWideChar points to
2848  the element of sWideChar[] where the conversion failed. If no errors
2849  occur or all errors are masked, then *sNextWideChar points to
2850  sWideChar + sWideChar_count.
2851 
2852  If sUTF32_count > 0, the return value is the number of ON__UINT32
2853  elements written to sUTF32[]. When the return value < sUTF32_count,
2854  a null terminator is written to sUTF32[return value].
2855 
2856  If sUTF32_count == 0, the return value is the minimum number of
2857  ON__UINT32 elements that are needed to hold the converted string.
2858  The return value does not include room for a null terminator.
2859  Increment the return value by one if you want to have an element
2860  to use for a null terminator.
2861 */
2862 ON_DECL
2863 int ON_ConvertWideCharToUTF32(
2864  int bTestByteOrder,
2865  const wchar_t* sWideChar,
2866  int sWideChar_count,
2867  ON__UINT32* sUTF32,
2868  int sUTF32_count,
2869  unsigned int* error_status,
2870  unsigned int error_mask,
2871  ON__UINT32 error_code_point,
2872  const wchar_t** sNextWideChar
2873  );
2874 
2875 
2876 /*
2877 Description:
2878  Convert a UTF-8 encoded char string to wchar_t string using
2879  the native platform's most common encoding.
2880 
2881  If 1 = sizeof(wchar_t), then UTF-8 encoding is used for the
2882  output string.
2883 
2884  If 2 = sizeof(wchar_t), then UTF-16 encoding is used for the
2885  output string. This is the case with current versions of
2886  Microsoft Windows.
2887 
2888  If 4 = sizeof(wchar_t), then UTF-32 encoding is used for the
2889  output string. This is the case with current versions of
2890  Apple OSX.
2891 
2892 Parameters:
2893  bTestByteOrder - [in]
2894  If bTestByteOrder is true and the the input buffer is a
2895  byte order mark (BOM), then the BOM is skipped. It the value
2896  of the BOM is byte swapped, then subsequent input elements are
2897  byte swapped before being decoded. Specifically:
2898  - If the size of an input buffer element is 1 byte and the
2899  values of the first three input elements are a UTF-8 BOM
2900  (0xEF, 0xBB, 0xBF), then the first three input elements are
2901  ignored and decoding begins at the forth input element.
2902  - If the size of an input buffer element is 2 bytes and the value
2903  of the first element is a UTF-16 BOM (0xFEFF), then the first
2904  element is ignored and decoding begins with the second element.
2905  - If the size of an input buffer element is 2 bytes and the value
2906  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
2907  then the first element is ignored, decoding begins with the
2908  second element, and input element bytes are swapped before
2909  being decoded.
2910  - If the size of an input buffer element is 4 bytes and the value
2911  of the first element is a UTF-32 BOM (0x0000FEFF), then the
2912  first element is ignored and decoding begins with the second
2913  element.
2914  - If the size of an input buffer element is 4 bytes and the value
2915  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
2916  then the first element is ignored, decoding begins with the
2917  second element, and input element bytes are swapped before
2918  being decoded.
2919  - In all other cases the first element of the input buffer is
2920  decoded and no byte swapping is performed.
2921 
2922  sUTF8 - [in]
2923  UTF-8 string to convert.
2924 
2925  sUTF8_count - [in]
2926  If sUTF8_count >= 0, then it specifies the number of
2927  char elements in sUTF8[] to convert.
2928 
2929  If sUTF8_count == -1, then sUTF8 must be a null terminated
2930  string and all the elements up to the first null element are
2931  converted.
2932 
2933  sWideChar - [out]
2934  If sWideChar is not null and sWideChar_count > 0, then the
2935  output string is returned in this buffer. If there is room
2936  for the null terminator, the converted string will be null
2937  terminated. The null terminator is never included in the count
2938  of returned by this function. The converted string is in the
2939  CPU's native byte order. No byte order mark is prepended.
2940 
2941  sWideChar_count - [in]
2942  If sWideChar_count > 0, then it specifies the number of available
2943  wchar_t elements in the sWideChar[] buffer.
2944 
2945  If sWideChar_count == 0, then the sWideChar parameter is ignored.
2946 
2947  error_status - [out]
2948  If error_status is not null, then bits of *error_status are
2949  set to indicate the success or failure of the conversion.
2950  When the error_mask parameter is used to used to mask some
2951  conversion errors, multiple bits may be set.
2952  0: Successful conversion with no errors.
2953  1: The input parameters were invalid.
2954  This error cannot be masked.
2955  2: The ouput buffer was not large enough to hold the converted
2956  string. As much conversion as possible is performed in this
2957  case and the error cannot be masked.
2958  4: When parsing a UTF-8 or UTF-32 string, the values of two
2959  consecutive encoding sequences formed a valid UTF-16
2960  surrogate pair.
2961  This error is masked if 0 != (4 & m_error_mask).
2962  If the error is masked, then the surrogate pair is
2963  decoded, the value of the resulting unicode code point
2964  is used, and parsing continues.
2965  8: An overlong UTF-8 encoding sequence was encountered and
2966  the value of the overlong sUTF-8 equence was a valid
2967  unicode code point.
2968  This error is masked if 0 != (8 & m_error_mask).
2969  If the error is masked, then the unicode code point
2970  is used and parsing continues.
2971  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
2972  or an invalid unicode code point value resulted from decoding
2973  a UTF-8 sequence.
2974  This error is masked if 0 != (16 & m_error_mask).
2975  If the error is masked and the value of error_code_point is
2976  a valid unicode code point, then error_code_point is encoded
2977  in the output string and parsing continues.
2978 
2979  error_mask - [in]
2980  If 0 != (error_mask & 4), then type 4 errors are masked.
2981  If 0 != (error_mask & 8), then type 8 errors are masked.
2982  If 0 != (error_mask & 16) and error_code_point is a valid unicode
2983  code point value, then type 16 errors are masked.
2984 
2985  error_code_point - [in]
2986  Unicode code point value to use in when masking type 16 errors.
2987  If 0 == (error_mask & 16), then this parameter is ignored.
2988  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
2989  is a popular choice for the error_code_point value.
2990 
2991  sNextUTF8 - [out]
2992  If sNextUTF8 is not null, then *sNextUTF8 points to the first
2993  element in the input sUTF8[] buffer that was not converted.
2994 
2995  If an error occurs and is not masked, then *sNextUTF8 points to
2996  the element of sUTF8[] where the conversion failed. If no errors
2997  occur or all errors are masked, then *sNextUTF8 points to
2998  sUTF8 + sUTF8_count.
2999 
3000 Returns:
3001  If sWideChar_count > 0, the return value is the number of wchar_t
3002  elements written to sWideChar[]. When the return value < sWideChar_count,
3003  a null terminator is written to sWideChar[return value].
3004 
3005  If sWideChar_count == 0, the return value is the minimum number of
3006  wchar_t elements that are needed to hold the converted string.
3007  The return value does not include room for a null terminator.
3008  Increment the return value by one if you want to have an element
3009  to use for a null terminator.
3010 */
3011 ON_DECL
3012 int ON_ConvertUTF8ToWideChar(
3013  int bTestByteOrder,
3014  const char* sUTF8,
3015  int sUTF8_count,
3016  wchar_t* sWideChar,
3017  int sWideChar_count,
3018  unsigned int* error_status,
3019  unsigned int error_mask,
3020  ON__UINT32 error_code_point,
3021  const char** sNextUTF8
3022  );
3023 
3024 /*
3025 Description:
3026  Convert a UTF-16 encoded string to wchar_t string using
3027  the native platform's most common encoding.
3028 
3029  If 1 = sizeof(wchar_t), then UTF-8 encoding is used for the
3030  output string.
3031 
3032  If 2 = sizeof(wchar_t), then UTF-16 encoding is used for the
3033  output string. This is the case with current versions of
3034  Microsoft Windows.
3035 
3036  If 4 = sizeof(wchar_t), then UTF-32 encoding is used for the
3037  output string. This is the case with current versions of
3038  Apple OSX.
3039 
3040 Parameters:
3041  bTestByteOrder - [in]
3042  If bTestByteOrder is true and the the input buffer is a
3043  byte order mark (BOM), then the BOM is skipped. It the value
3044  of the BOM is byte swapped, then subsequent input elements are
3045  byte swapped before being decoded. Specifically:
3046  - If the size of an input buffer element is 1 byte and the
3047  values of the first three input elements are a UTF-8 BOM
3048  (0xEF, 0xBB, 0xBF), then the first three input elements are
3049  ignored and decoding begins at the forth input element.
3050  - If the size of an input buffer element is 2 bytes and the value
3051  of the first element is a UTF-16 BOM (0xFEFF), then the first
3052  element is ignored and decoding begins with the second element.
3053  - If the size of an input buffer element is 2 bytes and the value
3054  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
3055  then the first element is ignored, decoding begins with the
3056  second element, and input element bytes are swapped before
3057  being decoded.
3058  - If the size of an input buffer element is 4 bytes and the value
3059  of the first element is a UTF-32 BOM (0x0000FEFF), then the
3060  first element is ignored and decoding begins with the second
3061  element.
3062  - If the size of an input buffer element is 4 bytes and the value
3063  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
3064  then the first element is ignored, decoding begins with the
3065  second element, and input element bytes are swapped before
3066  being decoded.
3067  - In all other cases the first element of the input buffer is
3068  decoded and no byte swapping is performed.
3069 
3070  sUTF16 - [in]
3071  UTF-16 string to convert.
3072 
3073  sUTF16_count - [in]
3074  If sUTF16_count >= 0, then it specifies the number of
3075  ON__UINT16 elements in sUTF16[] to convert.
3076 
3077  If sUTF16_count == -1, then sUTF16 must be a null terminated
3078  string and all the elements up to the first null element are
3079  converted.
3080 
3081  sWideChar - [out]
3082  If sWideChar is not null and sWideChar_count > 0, then the
3083  output string is returned in this buffer. If there is room
3084  for the null terminator, the converted string will be null
3085  terminated. The null terminator is never included in the count
3086  of returned by this function. The converted string is in the
3087  CPU's native byte order. No byte order mark is prepended.
3088 
3089  sWideChar_count - [in]
3090  If sWideChar_count > 0, then it specifies the number of available
3091  wchar_t elements in the sWideChar[] buffer.
3092 
3093  If sWideChar_count == 0, then the sWideChar parameter is ignored.
3094 
3095  error_status - [out]
3096  If error_status is not null, then bits of *error_status are
3097  set to indicate the success or failure of the conversion.
3098  When the error_mask parameter is used to used to mask some
3099  conversion errors, multiple bits may be set.
3100  0: Successful conversion with no errors.
3101  1: The input parameters were invalid.
3102  This error cannot be masked.
3103  2: The ouput buffer was not large enough to hold the converted
3104  string. As much conversion as possible is performed in this
3105  case and the error cannot be masked.
3106  4: When parsing a UTF-8 or UTF-32 string, the values of two
3107  consecutive encoding sequences formed a valid UTF-16
3108  surrogate pair.
3109  This error is masked if 0 != (4 & m_error_mask).
3110  If the error is masked, then the surrogate pair is
3111  decoded, the value of the resulting unicode code point
3112  is used, and parsing continues.
3113  8: An overlong UTF-8 encoding sequence was encountered and
3114  the value of the overlong sUTF-8 equence was a valid
3115  unicode code point.
3116  This error is masked if 0 != (8 & m_error_mask).
3117  If the error is masked, then the unicode code point
3118  is used and parsing continues.
3119  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
3120  or an invalid unicode code point value resulted from decoding
3121  a UTF-8 sequence.
3122  This error is masked if 0 != (16 & m_error_mask).
3123  If the error is masked and the value of error_code_point is
3124  a valid unicode code point, then error_code_point is encoded
3125  in the output string and parsing continues.
3126 
3127  error_mask - [in]
3128  If 0 != (error_mask & 4), then type 4 errors are masked.
3129  If 0 != (error_mask & 8), then type 8 errors are masked.
3130  If 0 != (error_mask & 16) and error_code_point is a valid unicode
3131  code point value, then type 16 errors are masked.
3132 
3133  error_code_point - [in]
3134  Unicode code point value to use in when masking type 16 errors.
3135  If 0 == (error_mask & 16), then this parameter is ignored.
3136  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
3137  is a popular choice for the error_code_point value.
3138 
3139  sNextUTF16 - [out]
3140  If sNextUTF16 is not null, then *sNextUTF16 points to the first
3141  element in the input sUTF16[] buffer that was not converted.
3142 
3143  If an error occurs and is not masked, then *sNextUTF8 points to
3144  the element of sUTF16[] where the conversion failed. If no errors
3145  occur or all errors are masked, then *sNextUTF16 points to
3146  sUTF16 + sUTF16_count.
3147 
3148 Returns:
3149  If sWideChar_count > 0, the return value is the number of wchar_t
3150  elements written to sWideChar[]. When the return value < sWideChar_count,
3151  a null terminator is written to sWideChar[return value].
3152 
3153  If sWideChar_count == 0, the return value is the minimum number of
3154  wchar_t elements that are needed to hold the converted string.
3155  The return value does not include room for a null terminator.
3156  Increment the return value by one if you want to have an element
3157  to use for a null terminator.
3158 */
3159 ON_DECL
3160 int ON_ConvertUTF16ToWideChar(
3161  int bTestByteOrder,
3162  const ON__UINT16* sUTF16,
3163  int sUTF16_count,
3164  wchar_t* sWideChar,
3165  int sWideChar_count,
3166  unsigned int* error_status,
3167  unsigned int error_mask,
3168  ON__UINT32 error_code_point,
3169  const ON__UINT16** sNextUTF16
3170  );
3171 
3172 /*
3173 Description:
3174  Convert a UTF-32 encoded string to wchar_t string using
3175  the native platform's most common encoding.
3176 
3177  If 1 = sizeof(wchar_t), then UTF-8 encoding is used for the
3178  output string.
3179 
3180  If 2 = sizeof(wchar_t), then UTF-16 encoding is used for the
3181  output string. This is the case with current versions of
3182  Microsoft Windows.
3183 
3184  If 4 = sizeof(wchar_t), then UTF-32 encoding is used for the
3185  output string. This is the case with current versions of
3186  Apple OSX.
3187 
3188 Parameters:
3189  bTestByteOrder - [in]
3190  If bTestByteOrder is true and the the input buffer is a
3191  byte order mark (BOM), then the BOM is skipped. It the value
3192  of the BOM is byte swapped, then subsequent input elements are
3193  byte swapped before being decoded. Specifically:
3194  - If the size of an input buffer element is 1 byte and the
3195  values of the first three input elements are a UTF-8 BOM
3196  (0xEF, 0xBB, 0xBF), then the first three input elements are
3197  ignored and decoding begins at the forth input element.
3198  - If the size of an input buffer element is 2 bytes and the value
3199  of the first element is a UTF-16 BOM (0xFEFF), then the first
3200  element is ignored and decoding begins with the second element.
3201  - If the size of an input buffer element is 2 bytes and the value
3202  of the first element is a byte swapped UTF-16 BOM (0xFFFE),
3203  then the first element is ignored, decoding begins with the
3204  second element, and input element bytes are swapped before
3205  being decoded.
3206  - If the size of an input buffer element is 4 bytes and the value
3207  of the first element is a UTF-32 BOM (0x0000FEFF), then the
3208  first element is ignored and decoding begins with the second
3209  element.
3210  - If the size of an input buffer element is 4 bytes and the value
3211  of the first element is bytes swapped UTF-32 BOM (0xFFFE0000),
3212  then the first element is ignored, decoding begins with the
3213  second element, and input element bytes are swapped before
3214  being decoded.
3215  - In all other cases the first element of the input buffer is
3216  decoded and no byte swapping is performed.
3217 
3218  sUTF32 - [in]
3219  UTF-32 string to convert.
3220 
3221  sUTF32_count - [in]
3222  If sUTF32_count >= 0, then it specifies the number of
3223  ON__UINT32 elements in sUTF32[] to convert.
3224 
3225  If sUTF32_count == -1, then sUTF32 must be a null terminated
3226  string and all the elements up to the first null element are
3227  converted.
3228 
3229  sWideChar - [out]
3230  If sWideChar is not null and sWideChar_count > 0, then the
3231  output string is returned in this buffer. If there is room
3232  for the null terminator, the converted string will be null
3233  terminated. The null terminator is never included in the count
3234  of returned by this function. The converted string is in the
3235  CPU's native byte order. No byte order mark is prepended.
3236 
3237  sWideChar_count - [in]
3238  If sWideChar_count > 0, then it specifies the number of available
3239  wchar_t elements in the sWideChar[] buffer.
3240 
3241  If sWideChar_count == 0, then the sWideChar parameter is ignored.
3242 
3243  error_status - [out]
3244  If error_status is not null, then bits of *error_status are
3245  set to indicate the success or failure of the conversion.
3246  When the error_mask parameter is used to used to mask some
3247  conversion errors, multiple bits may be set.
3248  0: Successful conversion with no errors.
3249  1: The input parameters were invalid.
3250  This error cannot be masked.
3251  2: The ouput buffer was not large enough to hold the converted
3252  string. As much conversion as possible is performed in this
3253  case and the error cannot be masked.
3254  4: When parsing a UTF-8 or UTF-32 string, the values of two
3255  consecutive encoding sequences formed a valid UTF-16
3256  surrogate pair.
3257  This error is masked if 0 != (4 & m_error_mask).
3258  If the error is masked, then the surrogate pair is
3259  decoded, the value of the resulting unicode code point
3260  is used, and parsing continues.
3261  8: An overlong UTF-8 encoding sequence was encountered and
3262  the value of the overlong sUTF-8 equence was a valid
3263  unicode code point.
3264  This error is masked if 0 != (8 & m_error_mask).
3265  If the error is masked, then the unicode code point
3266  is used and parsing continues.
3267  16: An illegal UTF-8, UTF-16 or UTF-32 encoding sequence occured
3268  or an invalid unicode code point value resulted from decoding
3269  a UTF-8 sequence.
3270  This error is masked if 0 != (16 & m_error_mask).
3271  If the error is masked and the value of error_code_point is
3272  a valid unicode code point, then error_code_point is encoded
3273  in the output string and parsing continues.
3274 
3275  error_mask - [in]
3276  If 0 != (error_mask & 4), then type 4 errors are masked.
3277  If 0 != (error_mask & 8), then type 8 errors are masked.
3278  If 0 != (error_mask & 16) and error_code_point is a valid unicode
3279  code point value, then type 16 errors are masked.
3280 
3281  error_code_point - [in]
3282  Unicode code point value to use in when masking type 16 errors.
3283  If 0 == (error_mask & 16), then this parameter is ignored.
3284  ON_UnicodeCodePoint::ON_ReplacementCharacter (U+FFFD)
3285  is a popular choice for the error_code_point value.
3286 
3287  sNextUTF32 - [out]
3288  If sNextUTF32 is not null, then *sNextUTF32 points to the first
3289  element in the input sUTF32[] buffer that was not converted.
3290 
3291  If an error occurs and is not masked, then *sNextUTF8 points to
3292  the element of sUTF32[] where the conversion failed. If no errors
3293  occur or all errors are masked, then *sNextUTF32 points to
3294  sUTF32 + sUTF32_count.
3295 
3296 Returns:
3297  If sWideChar_count > 0, the return value is the number of wchar_t
3298  elements written to sWideChar[]. When the return value < sWideChar_count,
3299  a null terminator is written to sWideChar[return value].
3300 
3301  If sWideChar_count == 0, the return value is the minimum number of
3302  wchar_t elements that are needed to hold the converted string.
3303  The return value does not include room for a null terminator.
3304  Increment the return value by one if you want to have an element
3305  to use for a null terminator.
3306 See Also:
3307  ON_wString::FromUnicodeCodePoints()
3308 */
3309 ON_DECL
3310 int ON_ConvertUTF32ToWideChar(
3311  int bTestByteOrder,
3312  const ON__UINT32* sUTF32,
3313  int sUTF32_count,
3314  wchar_t* sWideChar,
3315  int sWideChar_count,
3316  unsigned int* error_status,
3317  unsigned int error_mask,
3318  ON__UINT32 error_code_point,
3319  const ON__UINT32** sNextUTF32
3320  );
3321 
3322 /*
3323 Description:
3324  Convert a string from a Microsoft multibyte code page encoding
3325  to a wide string using the native platform's wchar_t encoding.
3326  This function is designed to be used to parse portions of
3327  rich text RTF in ON_TextContent and user interface code.
3328  Opennnurbs assumes all other char strings are UTF-8 encoded.
3329 
3330  If 1 = sizeof(wchar_t), then UTF-8 encoding is used for the
3331  output string.
3332 
3333  If 2 = sizeof(wchar_t), then UTF-16 encoding is used for the
3334  output string. This is the case with current versions of
3335  Microsoft Windows.
3336 
3337  If 4 = sizeof(wchar_t), then UTF-32 encoding is used for the
3338  output string. This is the case with current versions of
3339  Apple OSX.
3340 
3341 Parameters:
3342  windows_code_page - [in]
3343  THe windows code page specifices the encoding of the sMBCS string.
3344 
3345  sMBCS - [in]
3346  Windows multibyte string with encoding identified by windows_code_page.
3347 
3348  sMBCS_count - [in]
3349  If sMBCS_count >= 0, then it specifies the number of
3350  char elements in sMBCS[] to convert.
3351 
3352  If sMBCS_count == -1, then sMBCS must be a null terminated
3353  string and all the elements up to the first null element are
3354  converted.
3355 
3356  sWideChar - [out]
3357  If sWideChar is not null and sWideChar_count > 0, then the
3358  output string is returned in this buffer. If there is room
3359  for the null terminator, the converted string will be null
3360  terminated. The null terminator is never included in the count
3361  of returned by this function. The converted string is in the
3362  CPU's native byte order. No byte order mark is prepended.
3363 
3364  sWideChar_capacity - [in]
3365  If sWideChar_capacity > 0, then it specifies the number of available
3366  wchar_t elements in the sWideChar[] buffer.
3367 
3368  If sWideChar_count == 0, then the sWideChar parameter is ignored.
3369 
3370  error_status - [out]
3371  If error_status is not null, then bits of *error_status are
3372  set to indicate the success or failure of the conversion.
3373  When the error_mask parameter is used to used to mask some
3374  conversion errors, multiple bits may be set.
3375  0: Successful conversion with no errors.
3376  1: The input parameters were invalid.
3377  This error cannot be masked.
3378  2: The ouput buffer was not large enough to hold the converted
3379  string. As much conversion as possible is performed in this
3380  case and the error cannot be masked.
3381  16: An illegal encoding sequence occurred.
3382  The illegal sequence is replaced with
3383  a single ON_wString::ReplacementCharacter in the output string
3384  and parsing continues.
3385 
3386 Returns:
3387  If sWideChar_capacity > 0, the return value is the number of wchar_t
3388  elements written to sWideChar[]. When the return value < sWideChar_count,
3389  a null terminator is written to sWideChar[return value].
3390 
3391  If sWideChar_count == 0, the return value is the minimum number of
3392  wchar_t elements that are needed to hold the converted string.
3393  The return value does not include room for a null terminator.
3394  Increment the return value by one if you want to have an element
3395  to use for a null terminator.
3396 */
3397 ON_DECL
3398 int ON_ConvertMSMBCPToWideChar(
3399  ON__UINT32 windows_code_page,
3400  const char* sMBCS,
3401  int sMBCS_count,
3402  wchar_t* sWideChar,
3403  int sWideChar_capacity,
3404  unsigned int* error_status
3405  );
3406 
3407 
3408 ON_END_EXTERNC
3409 
3410 #if defined(ON_CPLUSPLUS)
3411 ON_DECL
3412 ON__UINT32 ON_Test_MSSBCP(
3413  const ON__UINT32 code_page,
3414  const ON__UINT32 char_encoding,
3415  bool bWindowsAPITest,
3416  ON_TextLog& text_log
3417 );
3418 
3419 ON_DECL
3420 bool ON_Test_MSSBCP(
3421  const ON__UINT32 code_page,
3422  bool bWindowsAPITest,
3423  ON_TextLog& text_log
3424 );
3425 
3426 ON_DECL
3427 bool ON_Test_MSSBCP(
3428  bool bWindowsAPITest,
3429  ON_TextLog& text_log
3430 );
3431 
3432 ON_DECL
3433 bool ON_Test_PrintPlatformMSSBCPToUnicodeTable(
3434  const ON__UINT32 code_page,
3435  ON__UINT32 char_encoding0,
3436  ON__UINT32 char_encoding1,
3437  ON_TextLog& text_log
3438 );
3439 
3440 #endif
3441 #endif
Definition: opennurbs_unicode.h:344
ON__UINT32 m_error_code_point
Definition: opennurbs_unicode.h:408
unsigned int m_error_mask
Definition: opennurbs_unicode.h:400
unsigned int m_error_status
Definition: opennurbs_unicode.h:392
Definition: opennurbs_textlog.h:20