| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- // Copyright 2022 The TCell Authors
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use file except in compliance with the License.
- // You may obtain a copy of the license at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package tcell
- import (
- "strings"
- "sync"
- "golang.org/x/text/encoding"
- gencoding "github.com/gdamore/encoding"
- )
- var encodings map[string]encoding.Encoding
- var encodingLk sync.Mutex
- var encodingFallback EncodingFallback = EncodingFallbackFail
- // RegisterEncoding may be called by the application to register an encoding.
- // The presence of additional encodings will facilitate application usage with
- // terminal environments where the I/O subsystem does not support Unicode.
- //
- // Windows systems use Unicode natively, and do not need any of the encoding
- // subsystem when using Windows Console screens.
- //
- // Please see the Go documentation for golang.org/x/text/encoding -- most of
- // the common ones exist already as stock variables. For example, ISO8859-15
- // can be registered using the following code:
- //
- // import "golang.org/x/text/encoding/charmap"
- //
- // ...
- // RegisterEncoding("ISO8859-15", charmap.ISO8859_15)
- //
- // Aliases can be registered as well, for example "8859-15" could be an alias
- // for "ISO8859-15".
- //
- // For POSIX systems, this package will check the environment variables
- // LC_ALL, LC_CTYPE, and LANG (in that order) to determine the character set.
- // These are expected to have the following pattern:
- //
- // $language[.$codeset[@$variant]
- //
- // We extract only the $codeset part, which will usually be something like
- // UTF-8 or ISO8859-15 or KOI8-R. Note that if the locale is either "POSIX"
- // or "C", then we assume US-ASCII (the POSIX 'portable character set'
- // and assume all other characters are somehow invalid.)
- //
- // Modern POSIX systems and terminal emulators may use UTF-8, and for those
- // systems, this API is also unnecessary. For example, Darwin (MacOS X) and
- // modern Linux running modern xterm generally will out of the box without
- // any of this. Use of UTF-8 is recommended when possible, as it saves
- // quite a lot processing overhead.
- //
- // Note that some encodings are quite large (for example GB18030 which is a
- // superset of Unicode) and so the application size can be expected to
- // increase quite a bit as each encoding is added.
- // The East Asian encodings have been seen to add 100-200K per encoding to the
- // size of the resulting binary.
- //
- func RegisterEncoding(charset string, enc encoding.Encoding) {
- encodingLk.Lock()
- charset = strings.ToLower(charset)
- encodings[charset] = enc
- encodingLk.Unlock()
- }
- // EncodingFallback describes how the system behaves when the locale
- // requires a character set that we do not support. The system always
- // supports UTF-8 and US-ASCII. On Windows consoles, UTF-16LE is also
- // supported automatically. Other character sets must be added using the
- // RegisterEncoding API. (A large group of nearly all of them can be
- // added using the RegisterAll function in the encoding sub package.)
- type EncodingFallback int
- const (
- // EncodingFallbackFail behavior causes GetEncoding to fail
- // when it cannot find an encoding.
- EncodingFallbackFail = iota
- // EncodingFallbackASCII behavior causes GetEncoding to fall back
- // to a 7-bit ASCII encoding, if no other encoding can be found.
- EncodingFallbackASCII
- // EncodingFallbackUTF8 behavior causes GetEncoding to assume
- // UTF8 can pass unmodified upon failure. Note that this behavior
- // is not recommended, unless you are sure your terminal can cope
- // with real UTF8 sequences.
- EncodingFallbackUTF8
- )
- // SetEncodingFallback changes the behavior of GetEncoding when a suitable
- // encoding is not found. The default is EncodingFallbackFail, which
- // causes GetEncoding to simply return nil.
- func SetEncodingFallback(fb EncodingFallback) {
- encodingLk.Lock()
- encodingFallback = fb
- encodingLk.Unlock()
- }
- // GetEncoding is used by Screen implementors who want to locate an encoding
- // for the given character set name. Note that this will return nil for
- // either the Unicode (UTF-8) or ASCII encodings, since we don't use
- // encodings for them but instead have our own native methods.
- func GetEncoding(charset string) encoding.Encoding {
- charset = strings.ToLower(charset)
- encodingLk.Lock()
- defer encodingLk.Unlock()
- if enc, ok := encodings[charset]; ok {
- return enc
- }
- switch encodingFallback {
- case EncodingFallbackASCII:
- return gencoding.ASCII
- case EncodingFallbackUTF8:
- return encoding.Nop
- }
- return nil
- }
- func init() {
- // We always support UTF-8 and ASCII.
- encodings = make(map[string]encoding.Encoding)
- encodings["utf-8"] = gencoding.UTF8
- encodings["utf8"] = gencoding.UTF8
- encodings["us-ascii"] = gencoding.ASCII
- encodings["ascii"] = gencoding.ASCII
- encodings["iso646"] = gencoding.ASCII
- }
|