encoding.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. // Copyright 2022 The TCell Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use file except in compliance with the License.
  5. // You may obtain a copy of the license at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package tcell
  15. import (
  16. "strings"
  17. "sync"
  18. "golang.org/x/text/encoding"
  19. gencoding "github.com/gdamore/encoding"
  20. )
  21. var encodings map[string]encoding.Encoding
  22. var encodingLk sync.Mutex
  23. var encodingFallback EncodingFallback = EncodingFallbackFail
  24. // RegisterEncoding may be called by the application to register an encoding.
  25. // The presence of additional encodings will facilitate application usage with
  26. // terminal environments where the I/O subsystem does not support Unicode.
  27. //
  28. // Windows systems use Unicode natively, and do not need any of the encoding
  29. // subsystem when using Windows Console screens.
  30. //
  31. // Please see the Go documentation for golang.org/x/text/encoding -- most of
  32. // the common ones exist already as stock variables. For example, ISO8859-15
  33. // can be registered using the following code:
  34. //
  35. // import "golang.org/x/text/encoding/charmap"
  36. //
  37. // ...
  38. // RegisterEncoding("ISO8859-15", charmap.ISO8859_15)
  39. //
  40. // Aliases can be registered as well, for example "8859-15" could be an alias
  41. // for "ISO8859-15".
  42. //
  43. // For POSIX systems, this package will check the environment variables
  44. // LC_ALL, LC_CTYPE, and LANG (in that order) to determine the character set.
  45. // These are expected to have the following pattern:
  46. //
  47. // $language[.$codeset[@$variant]
  48. //
  49. // We extract only the $codeset part, which will usually be something like
  50. // UTF-8 or ISO8859-15 or KOI8-R. Note that if the locale is either "POSIX"
  51. // or "C", then we assume US-ASCII (the POSIX 'portable character set'
  52. // and assume all other characters are somehow invalid.)
  53. //
  54. // Modern POSIX systems and terminal emulators may use UTF-8, and for those
  55. // systems, this API is also unnecessary. For example, Darwin (MacOS X) and
  56. // modern Linux running modern xterm generally will out of the box without
  57. // any of this. Use of UTF-8 is recommended when possible, as it saves
  58. // quite a lot processing overhead.
  59. //
  60. // Note that some encodings are quite large (for example GB18030 which is a
  61. // superset of Unicode) and so the application size can be expected to
  62. // increase quite a bit as each encoding is added.
  63. // The East Asian encodings have been seen to add 100-200K per encoding to the
  64. // size of the resulting binary.
  65. //
  66. func RegisterEncoding(charset string, enc encoding.Encoding) {
  67. encodingLk.Lock()
  68. charset = strings.ToLower(charset)
  69. encodings[charset] = enc
  70. encodingLk.Unlock()
  71. }
  72. // EncodingFallback describes how the system behaves when the locale
  73. // requires a character set that we do not support. The system always
  74. // supports UTF-8 and US-ASCII. On Windows consoles, UTF-16LE is also
  75. // supported automatically. Other character sets must be added using the
  76. // RegisterEncoding API. (A large group of nearly all of them can be
  77. // added using the RegisterAll function in the encoding sub package.)
  78. type EncodingFallback int
  79. const (
  80. // EncodingFallbackFail behavior causes GetEncoding to fail
  81. // when it cannot find an encoding.
  82. EncodingFallbackFail = iota
  83. // EncodingFallbackASCII behavior causes GetEncoding to fall back
  84. // to a 7-bit ASCII encoding, if no other encoding can be found.
  85. EncodingFallbackASCII
  86. // EncodingFallbackUTF8 behavior causes GetEncoding to assume
  87. // UTF8 can pass unmodified upon failure. Note that this behavior
  88. // is not recommended, unless you are sure your terminal can cope
  89. // with real UTF8 sequences.
  90. EncodingFallbackUTF8
  91. )
  92. // SetEncodingFallback changes the behavior of GetEncoding when a suitable
  93. // encoding is not found. The default is EncodingFallbackFail, which
  94. // causes GetEncoding to simply return nil.
  95. func SetEncodingFallback(fb EncodingFallback) {
  96. encodingLk.Lock()
  97. encodingFallback = fb
  98. encodingLk.Unlock()
  99. }
  100. // GetEncoding is used by Screen implementors who want to locate an encoding
  101. // for the given character set name. Note that this will return nil for
  102. // either the Unicode (UTF-8) or ASCII encodings, since we don't use
  103. // encodings for them but instead have our own native methods.
  104. func GetEncoding(charset string) encoding.Encoding {
  105. charset = strings.ToLower(charset)
  106. encodingLk.Lock()
  107. defer encodingLk.Unlock()
  108. if enc, ok := encodings[charset]; ok {
  109. return enc
  110. }
  111. switch encodingFallback {
  112. case EncodingFallbackASCII:
  113. return gencoding.ASCII
  114. case EncodingFallbackUTF8:
  115. return encoding.Nop
  116. }
  117. return nil
  118. }
  119. func init() {
  120. // We always support UTF-8 and ASCII.
  121. encodings = make(map[string]encoding.Encoding)
  122. encodings["utf-8"] = gencoding.UTF8
  123. encodings["utf8"] = gencoding.UTF8
  124. encodings["us-ascii"] = gencoding.ASCII
  125. encodings["ascii"] = gencoding.ASCII
  126. encodings["iso646"] = gencoding.ASCII
  127. }