opennurbs_locale.h
1 /* $NoKeywords: $ */
2 /*
3 //
4 // Copyright (c) 1993-2014 Robert McNeel & Associates. All rights reserved.
5 // OpenNURBS, Rhinoceros, and Rhino3D are registered trademarks of Robert
6 // McNeel & Associates.
7 //
8 // THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
9 // ALL IMPLIED WARRANTIES OF FITNESS FOR ANY PARTICULAR PURPOSE AND OF
10 // MERCHANTABILITY ARE HEREBY DISCLAIMED.
11 //
12 // For complete openNURBS copyright information see <http://www.opennurbs.org>.
13 //
14 ////////////////////////////////////////////////////////////////
15 */
16 
17 #if !defined(OPENNURBS_LOCALE_INC_)
18 #define OPENNURBS_LOCALE_INC_
19 
20 typedef
21 #if defined(ON_RUNTIME_WIN)
22  _locale_t
23 #elif defined(ON_RUNTIME_APPLE)
24  locale_t
25 #elif defined(ON_RUNTIME_ANDROID)
26  locale_t
27 #else
28  ON__UINT_PTR
29 #endif
30  ON_CRT_locale_t;
31 
32 class ON_CLASS ON_Locale
33 {
34 public:
35 
36  enum WindowsLCID : unsigned int
37  {
38  OrdinalLCID = 0, // not a real Windows LCID
39 
40  InvariantCultureLCID = 0x0027, // 39 decimal
41 
42  // Windows LCID for languages Rhino supports
43 
44  // "cs-CZ" Czech, ???? script implied
45  cs_CZ_LCID = 0x0405, //1029 decimal
46 
47  // "de-DE" German, Germany, Latn script implied
48  de_DE_LCID = 0x0407, // 1031 decimal
49 
50  // "en-US" English, US, Latn script implied
51  en_US_LCID = 0x0409, // 1033 decimal
52 
53  // "en-CA" English, Canada, Latn script implied
54  en_CA_LCID = 0x1009, // 4105 decimal
55 
56  // "es-ES_tradnl" Spanish, Spain, Latn script implied, traditional sort
57  es_ES_tradnl_LCID = 0x040A, // 1034 decimal
58 
59  // "es-ES" Spanish, Spain, Latn script implied, modern sort
60  es_ES_LCID = 0x0c0a, // 3082 decimal
61 
62  // "fr-FR" French, France, Latn script implied
63  fr_FR_LCID = 0x040c, // 1036 decimal
64 
65  // "it-IT" Italian, Italy, Latn script implied
66  it_IT_LCID = 0x0410, // 1040 decimal
67 
68  // "ja-JP" Japanese, Japan, ???? script implied
69  ja_JP_LCID = 0x0411, // 1041 decimal
70 
71  // Korean, Republic of Korea, ???? script implied
72  ko_KR_LCID = 0x0412, // 1042 decimal
73 
74  // Polish, Poland, ???? script implied
75  pl_PL_LCID = 0x0415, // 1045 decimal
76 
77  // Portuguese, Portugal, Latn script implied
78  pt_PT_LCID = 0x0816, // 2070 decimal
79 
80  // According to https://en.wikipedia.org/wiki/Chinese_language, Chinese is a family of language
81  // varieties, often mutually unintelligible. Specifying both Script and REGION
82  // (zh-Hans-CN or zh-Hant-TW) doesn't narrow things down nearly enough.
83  //
84  // Basically we have to hope the string collate and mapping functions supplied by the OS and
85  // the translations supplied by our staff work well for our customers who select from the
86  // two types of "Chinese" Rhino offers.
87  //
88 
89  // Standard Chinese (Mandarin), Peoples Republic of China, Hans script implied (simplified characters)
90  zh_CN_LCID = 0x0804, // 2052 decimal
91 
92  // Standard Chinese (Mandarin), Taiwan, Hant script implied (traditional characters)
93  zh_TW_LCID = 0x0404 // 1028 decimal
94  };
95 
96  // The ordinal locale.
97  // String compares use ordinal element values.
98  // The decimal point is a period.
99  static const ON_Locale Ordinal;
100 
101  // The invariant culture locale.
102  // The decimal point is a period.
104 
105 private:
106  static ON_Locale m_CurrentCulture;
107 
108 public:
109  // Reference to ON_Locale::m_CurrentCulture.
110  // The value is set by calling ON_Locale::SetCurrentCulture();
111  // The default is a copy of ON_Locale::Ordinal.
112  static const ON_Locale& CurrentCulture;
113 
114  /*
115  Description:
116  Set the current culture locale
117  Parameters:
118  current_culture_locale - [in]
119  */
120  static bool SetCurrentCulture(
121  const ON_Locale& current_culture_locale
122  );
123 
124 
125  // Default construction creates a copy of ON_Local::Ordinal
126  ON_Locale() ON_NOEXCEPT;
127 
128  ~ON_Locale() = default;
129  ON_Locale(const ON_Locale&) = default;
130  ON_Locale& operator=(const ON_Locale&) = default;
131 
132  // Maximum buffer capacity for any ON_Locale functions
133  // that return string information in a buffer.
134  enum
135  {
136  BUFFER_MAXIMUM_CAPACITY = 128
137  };
138 
139  /*
140  Description:
141  Get the language id.
142 
143  Parameters:
144  buffer - [out]
145  A null terminated string containing the language id is returned in this buffer.
146  The string has the form:
147 
148  <language>[-<Script>][-<REGION>]
149 
150  <language>
151  ISO 639 language code.
152  http://www.iso.org/iso/language_codes
153 
154  <Script> is optional.
155  If present, it is a 4 alpha letter ISO 15924 script code
156  http://www.unicode.org/iso15924/iso15924-codes.html
157 
158  <REGION>
159  ISO 3166-1 country/region identifier. (2 alpha letters)
160  or UN M.49 code (3 digits)
161  http://www.iso.org/iso/home/standards/country_codes.htm
162 
163  buffer_capacity - [in]
164  number of elements in the buffer.
165  A capacity >= ON_Locale::BUFFER_MAXIMUM_CAPACITY will be large enough to
166  hold all possible output.
167 
168  Returns:
169  If buffer_capacity is to small or buffer is nullptr, then nullptr is returned.
170  Otherwise the pointer to buffer is returned.
171 
172  Remarks:
173  The Invariant language name is the empty string "".
174  */
175  const char* GetBCP47LanguageTag(
176  char* buffer,
177  size_t buffer_capacity
178  ) const;
179 
180  const wchar_t* GetBCP47LanguageTag(
181  wchar_t* buffer,
182  size_t buffer_capacity
183  ) const;
184 
185  /*
186  Parameters:
187  A string of the form
188 
189  <language>[-<Script>][-<REGION>]
190 
191  <language>
192  ISO 639 language code.
193  http://www.iso.org/iso/language_codes
194 
195  <Script> is optional.
196  If present, it is a 4 alpha letter ISO 15924 script code
197  http://www.unicode.org/iso15924/iso15924-codes.html
198 
199  <REGION>
200  ISO 3166-1 country/region identifier. (2 alpha letters)
201  or UN M.49 code (3 digits)
202  http://www.iso.org/iso/home/standards/country_codes.htm
203 
204  Remarks:
205  ON_Locale::InvariantCulture.BCP47LanguageName() = "";
206  ON_Locale::Oridnal.BCP47LanguageName() = "";
207  */
208  const char* BCP47LanguageTag() const;
209 
210 
211  /*
212  Returns:
213  ISO 639 language code.
214  When avilable, two letter codes from ISO 639-1 are prefered.
215  Remarks:
216  The InvariantCulture.LanguageCode() is "".
217  See Also:
218  http://www.iso.org/iso/language_codes
219  */
220  const char* LanguageCode() const;
221 
222  /*
223  Returns:
224  ISO 3166-1 country/region identifier (2 alpha) or UN M.49 code (3 digits)
225  Remarks:
226  The returned string can be "" if the no region is specified.
227  The InvariantCulture.RegionCode() is "".
228  See Also:
229  http://www.iso.org/iso/home/standards/country_codes.htm
230  */
231  const char* RegionCode() const;
232 
233  /*
234  Returns:
235  A 4 letter ISO 15924 script code
236  Remarks:
237  The returned string can be "" if the no script is specified for the locale.
238  The InvariantCulture.ScriptCode() is "".
239  See Also:
240  http://www.unicode.org/iso15924/iso15924-codes.html
241  */
242  const char* ScriptCode() const;
243 
244 
245  /*
246  Returns:
247  Microsoft Windows LCID value
248  ON_LocaleLCID::OrdinalLCID (=0)
249  The locale is a copy of ON_Locale::Ordinal.
250  ON_Locale::InvariantCultureLCID (=0x00000027U)
251  The locale is a copy of ON_Locale::InvariantCulture.
252  */
253  ON__UINT32 WindowsLCID() const;
254 
255  /*
256  Description:
257  Get the Microsoft Windows locale id.
258 
259  Parameters:
260  buffer - [out]
261  A null terminated string containing the Microsoft Windows locale id is returned in this buffer.
262  The string has the form:
263 
264  <language>[-<Script>][-<REGION>][_<sort_order>] (UTF-8 string encoding)
265 
266  <language>
267  ISO 639 language code.
268  http://www.iso.org/iso/language_codes
269 
270  <Script> is optional.
271  If present, it is a 4 alpha letter ISO 15924 script code
272  http://www.unicode.org/iso15924/iso15924-codes.html
273 
274  <REGION>
275  ISO 3166-1 country/region identifier. (2 alpha letters)
276  or UN M.49 code (3 digits)
277  http://www.iso.org/iso/home/standards/country_codes.htm
278 
279  <sort_order>
280  Up to six letters specifying a sort order.
281  Microsoft Windows codes are used.
282 
283  buffer_capacity - [in]
284  number of elements in the buffer.
285  A capacity >= ON_Locale::BUFFER_MAXIMUM_CAPACITY will be large enough to
286  hold all possible output.
287 
288  Returns:
289  If buffer_capacity is to small or buffer is nullptr, then nullptr is returned.
290  Otherwise the pointer to buffer is returned.
291 
292  Remarks:
293  The Invariant locale name is the empty string "".
294  */
295  const char* GetWindowsLocaleName(
296  char* buffer,
297  size_t buffer_capacity
298  ) const;
299 
300  const wchar_t* GetWindowsLocaleName(
301  wchar_t* buffer,
302  size_t buffer_capacity
303  ) const;
304 
305  /*
306  Returns:
307  Apple OS X / iOS locale name in the form
308  <language>[-<Script>][_<REGION>]
309 
310  <language>
311  ISO 639 language code.
312  When avilable, two letter codes from ISO 639-1 are prefered.
313  http://www.iso.org/iso/language_codes
314 
315  <Script> is optional.
316  If present, it is a 4 alpha letter ISO 15924 script code
317  http://www.unicode.org/iso15924/iso15924-codes.html
318 
319  <REGION>
320  ISO 3166-1 country/region identifier. (2 alpha letters)
321  or UN M.49 code (3 digits)
322  http://www.iso.org/iso/home/standards/country_codes.htm
323 
324  Remarks:
325  The Invariant locale name is the empty string "".
326  Apple language names have a hyphen (-) before the region.
327  Apple locale names have an underbar (_) before the region.
328  */
329  const char* GetAppleLocaleName(
330  char* buffer,
331  size_t buffer_capacity
332  ) const;
333 
334  const wchar_t* GetAppleLocaleName(
335  wchar_t* buffer,
336  size_t buffer_capacity
337  ) const;
338 
339  /*
340  Returns:
341  Apple OS X / iOS locale name in the form
342  <language>[-<Script>][-<REGION>]
343 
344  <language>
345  ISO 639 language code.
346  When avilable, two letter codes from ISO 639-1 are prefered.
347  http://www.iso.org/iso/language_codes
348 
349  <Script> is optional.
350  If present, it is a 4 alpha letter ISO 15924 script code
351  http://www.unicode.org/iso15924/iso15924-codes.html
352 
353  <REGION>
354  ISO 3166-1 country/region identifier. (2 alpha letters)
355  or UN M.49 code (3 digits)
356  http://www.iso.org/iso/home/standards/country_codes.htm
357 
358  Remarks:
359  The Invariant locale name is the empty string "".
360  Apple language names have a hyphen (-) before the region.
361  Apple locale names have an underbar (_) before the region.
362  */
363  const char* GetAppleLanguageName(
364  char* buffer,
365  size_t buffer_capacity
366  ) const;
367 
368  const wchar_t* GetAppleLanguageName(
369  wchar_t* buffer,
370  size_t buffer_capacity
371  ) const;
372 
373  /*
374  Returns:
375  A 6 letter locale sort order.
376  Remarks:
377  The returned string can be "" if the no sort order is specified for the locale.
378  The InvariantCulture.WindowsSortOrder() is "".
379  See Also:
380  https://msdn.microsoft.com/en-us/library/windows/desktop/dd374060(v=vs.85).aspx
381  */
382  const char* WindowsSortOrder() const;
383 
384  /*
385  Returns:
386  True if the C runtime formatted printing and scanning functions
387  are using the period character as the decimal point for
388  doubles and floats.
389  */
390  static bool PeriodIsCRuntimeDecimalPoint();
391 
392  /*
393  Description:
394  Use a call like setlocale(LC_NUMERIC,"C") to configure the
395  C runtime formatted printing and scanning functions to use the
396  period character as the decimal point for doubles and floats.
397  Returns:
398  True if successful.
399  */
400  static bool SetPeriodAsCRuntimeDecimalPoint();
401 
402 
403  /*
404  Description:
405  Use a call like setlocale(LC_NUMERIC,"C") to configure the
406  C runtime formatted printing and scanning functions to use the
407  period character as the decimal point for doubles and floats.
408  Returns:
409  0: failed
410  1: success
411  Currently The decimal piont is a period in the C-runtime
412  formatted printing and scanning functions.
413  2: success
414  When called, the decimal piont was not a period, but
415  a call to ON_Locale::SetPeriodAsCRuntimeDecimalPoint()
416  restored the defaut behavior.
417  */
418  static unsigned int EnforcePeriodAsCRuntimeDecimalPoint();
419 
420 
421  /*
422  Returns:
423  True if this is ON_Locale:InvariantCulture or a copy.
424  */
425  bool IsInvariantCulture() const;
426 
427  /*
428  Returns:
429  True if this is ON_Locale:Ordinal or a copy.
430  */
431  bool IsOrdinal() const;
432 
433  /*
434  Returns:
435  True if this is ON_Locale:Ordinal, ON_Locale:InvariantCulture or a copy
436  of one of them.
437  */
438  bool IsOrdinalOrInvariantCulture() const;
439 
440  /*
441  Description:
442  NumericLocalePtr() is an expert user function needed
443  to call C-runtime functions that format or parse numbers.
444  This locale must never be used to collate or map strings.
445 
446  The primary use for this function is in opennurbs implementations
447  of ON_String and ON_wString number formatting and parsing functions.
448  Example:
449 
450  // Call _sprintf_p_l
451  ON_CRT_locale_t numeric_locale = ON_Locale::CurrentCulture::NumericLocalePtr();
452  _sprintf_p_l(....,locale,...);
453 
454  Returns:
455  A value that can be passed into C-runtime functions that take
456  a locale parameter.
457  */
458  ON_CRT_locale_t NumericLocalePtr() const;
459 
460 
461  /*
462  Description:
463  StringCollateAndMapLocalePtr() is an expert user function needed
464  to call C-runtime functions that collate (compare)
465  and map (toupper/tolower) strings. This locale must never be used
466  for formatting or parsing numbers.
467 
468  The primary use for this function is in opennurbs implementations
469  of ON_String and ON_wString collate and map functions.
470  Example:
471 
472  // Call _wcsicoll_l
473  ON_CRT_locale_t coll_locale = ON_Locale::CurrentCulture::StringCollateAndMapLocalePtr();
474  _wcsicoll_l(....,coll_locale);
475 
476  Returns:
477  A value that can be passed into C-runtime functions that take
478  a locale parameter.
479  */
480  ON_CRT_locale_t StringCollateAndMapLocalePtr() const;
481 
482  /*
483  Description:
484  Create a locale from a Windows locale id.
485 
486  Parameters:
487  lcid - [in]
488  Windows LCID value or zero for the "ordinal" locale.
489 
490  Returns:
491  ON_Locale identified by lcid.
492  If lcid is not valid or not supported, a copy of ON_Locale::Ordinal is returned.
493  */
494  static ON_Locale FromWindowsLCID(
495  ON__UINT32 windows_lcid
496  );
497 
498  /*
499  Description:
500  Create a locale from a BCP 47 language name.
501 
502  Parameters:
503  language_name - [in]
504  The language name has the form
505  <language>[-<Script>][-<REGION>]
506  Case is not important.
507 
508  Returns:
509  ON_Locale identified by language_name.
510  If locale_name is not valid or not supported, a copy of ON_Locale::Ordinal is returned.
511  */
512  static ON_Locale FromBCP47LanguageName(
513  const char* language_name
514  );
515 
516  static ON_Locale FromBCP47LanguageName(
517  const wchar_t* language_name
518  );
519 
520  /*
521  Description:
522  Create a locale from a Windows locale name.
523 
524  Parameters:
525  windows_name - [in]
526  The Windows name has the form
527  <language>[-<Script>][-<REGION>][_<sort_order>]
528  Case is not important.
529 
530  Returns:
531  ON_Locale identified by locale_name.
532  If locale_name is not valid or not supported, a copy of ON_Locale::Ordinal is returned.
533  */
534  static ON_Locale FromWindowsName(
535  const char* windows_name
536  );
537 
538  static ON_Locale FromWindowsName(
539  const wchar_t* windows_name
540  );
541 
542  /*
543  Description:
544  Create a locale from an Apple locale or language name
545  Parameters:
546  apple_name - [in]
547  The Apple name has the form <language>[-<Script>][-<REGION>].
548  An underbar (_) may be used in place of a hyphen (-).
549  Case is not important.
550  Returns:
551  ON_Locale identified by locale_name.
552  If locale_name is not valid or not supported, a copy of ON_Locale::Ordinal is returned.
553 
554  */
555  static ON_Locale FromAppleName(
556  const char* apple_name
557  );
558 
559  static ON_Locale FromAppleName(
560  const wchar_t* apple_name
561  );
562 
563  /*
564  Description:
565  Create a locale from BCP 47 lanugage code, script code and region code.
566 
567  Parameters:
568  language_code - [in]
569  ISO 639 language code.
570  When avilable, two letter codes from ISO 639-1 are prefered.
571  http://www.iso.org/iso/language_codes
572 
573  script - [in]
574  nullptr, empty string, or a 4 letter ISO 15924 script code
575  http://www.unicode.org/iso15924/iso15924-codes.html
576 
577  <REGION>
578  nullptr, empty string, or an ISO 3166 country/region identifier.
579  http://www.iso.org/iso/home/standards/country_codes.htm
580 
581  Returns:
582  ON_Locale identified by the locale name.
583  If the locale name is not valid or not supported, a copy of ON_Locale::Ordinal is returned.
584  */
585  static ON_Locale FromSubtags(
586  const char* language_code,
587  const char* script_code,
588  const char* region_code
589  );
590 
591  static ON_Locale FromSubtags(
592  const wchar_t* language_code,
593  const wchar_t* script_code,
594  const wchar_t* region_code
595  );
596 
597  /*
598  Description:
599  Attempt to parse a string that is a language name or locale name
600  and extract language code, extlang code script code, region code
601  and Windows sort order.
602 
603  The language name has the form <language>[<-extlang>][-<Script>][-<REGION>]
604 
605  If the Microsoft [_<windows_sort_order>] appears after the language name,
606  it is parsed.
607 
608  Apple "locale ids" of the form <language>_<REGION>" are parsed as well
609  (an underbar separator instead of a hyphen before <REGION>).
610 
611  Parameters:
612  locale_name - [in]
613  name to parse. Case is ignored.
614  locale_name_element_count - [in]
615  number of elements to parse in locale_name[]
616  If locale_name_element_count < 0, then a null terminator ends parsing.
617 
618  language_code - [out]
619  language_code_capacity - [in]
620  number of elements available in language_code[].
621 
622  extlang_code - [out]
623  extlang_code_capacity - [in]
624  number of elements available in extlang_code[].
625 
626  script_code - [out]
627  script_code_capacity - [in]
628  number of elements available in script_code[].
629 
630  region_code - [out]
631  region_code_capacity - [in]
632  number of elements available in region_code[].
633 
634  windows_sortorder - [out]
635  windows_sortorder_capacity - [in]
636  number of elements available in windows_sortorder[].
637 
638  Remarks:
639  The standards for language identifiers (RFC 5646 and BCP 47) states that a hyphen
640  ( Unicode U+002D ) is supposed to be the separator between subtags.
641 
642  ftp://ftp.isi.edu/in-notes/bcp/bcp47.txt
643  */
644  static bool ParseName(
645  const wchar_t* locale_name,
646  int locale_name_element_count,
647  wchar_t* language_code,
648  size_t language_code_capacity,
649  wchar_t* extlang_code,
650  size_t extlang_code_capacity,
651  wchar_t* script_code,
652  size_t script_code_capacity,
653  wchar_t* region_code,
654  size_t region_code_capacity,
655  wchar_t* windows_sortorder,
656  size_t windows_sortorder_capacity
657  );
658 
659  static bool ParseName(
660  const char* locale_name,
661  int locale_name_element_count,
662  char* language_code,
663  size_t language_code_capacity,
664  char* extlang_code,
665  size_t extlang_code_capacity,
666  char* script_code,
667  size_t script_code_capacity,
668  char* region_code,
669  size_t region_code_capacity,
670  char* windows_sortorder,
671  size_t windows_sortorder_capacity
672  );
673 
674 private:
675  ON_CRT_locale_t m_numeric_locale = 0; // pointer to a C runtime locale type
676  ON_CRT_locale_t m_string_coll_map_locale = 0; // pointer to a C runtime locale type
677 
678  char m_bcp47_language_tag[85]; // <language>-<Script>-<REGION>
679 
680  // RFC 4646 language identifier
681  char m_language_subtag[9]; // ISO 639 code (RFC 4646 reserves 8 alpha elements)
682  char m_script_subtag[5]; // ISO 15924 code
683  char m_region_subtag[5]; // ISO 3166 code (2 alpha) or UN M.49 code (3 digit)
684 
685  char m_windows_sortorder[7]; // Windows sort order
686  char m_reserved2[21];
687 
688  // Values needed to use Windows tools
689  ON__UINT32 m_windows_lcid = 0; // Microsoft Windows LCID values (0 = ordinal, 0x0027 = invariant culture)
690  ON__UINT32 m_reserved3 = 0;
691 
692 private:
693 
694  // Construct from lcid and matching name
695  static ON_Locale FromWindowsLCIDAndName(
696  ON__UINT32 windows_lcid,
697  const char* name
698  );
699 
700  // Construct from perfect input
701  //ON_Locale(
702  // ON__UINT_PTR string_coll_map_locale_ptr,
703  // ON__UINT32 windows_lcid,
704  // const char* language_name,
705  // const char* language_code,
706  // const char* script_code,
707  // const char* region_code,
708  // const char* windows_sortorder
709  // );
710 
711 };
712 
713 #endif
static const ON_Locale InvariantCulture
Definition: opennurbs_locale.h:103
static const ON_Locale Ordinal
Definition: opennurbs_locale.h:99
WindowsLCID
Definition: opennurbs_locale.h:36
static const ON_Locale & CurrentCulture
Definition: opennurbs_locale.h:112
Definition: opennurbs_locale.h:32