systemd/src/libsystemd-terminal/term-charset.c

/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/

/***
  This file is part of systemd.

  Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

/*
 * VTE Character Sets
 * These are predefined charactersets that can be loaded into GL and GR. By
 * default we use unicode_lower and unicode_upper, that is, both sets have the
 * exact unicode mapping. unicode_lower is effectively ASCII and unicode_upper
 * as defined by the unicode standard (I guess, ISO 8859-1).
 * Several other character sets are defined here. However, all of them are
 * limited to the 96 character space of GL or GR. Everything beyond GR (which
 * was not supported by the classic VTs by DEC but is available in VT emulators
 * that support unicode/UTF8) is always mapped to unicode and cannot be changed
 * by these character sets. Even mapping GL and GR is only available for
 * backwards compatibility as new applications can use the Unicode functionality
 * of the VTE.
 *
 * Moreover, mapping GR is almost unnecessary to support. In fact, Unicode UTF-8
 * support in VTE works by reading every incoming data as UTF-8 stream. This
 * maps GL/ASCII to ASCII, as UTF-8 is backwards compatible to ASCII, however,
 * everything that has the 8th bit set is a >=2-byte haracter in UTF-8. That is,
 * this is in no way backwards compatible to >=VT220 8bit support. Therefore, if
 * someone maps a character set into GR and wants to use them with this VTE,
 * then they must already send UTF-8 characters to use GR (all GR characters are
 * 8-bits). Hence, they can easily also send the correct UTF-8 character for the
 * unicode mapping.
 * The only advantage is that most characters in many sets are 3-byte UTF-8
 * characters and by mapping the set into GR/GL you can use 2 or 1 byte UTF-8
 * characters which saves bandwidth.
 * Another reason is, if you have older applications that use the VT220 8-bit
 * support and you put a ASCII/8bit-extension to UTF-8 converter in between, you
 * need these mappings to have the application behave correctly if it uses GL/GR
 * mappings extensively.
 *
 * Anyway, we support GL/GR mappings so here are the most commonly used maps as
 * defined by Unicode-standard, DEC-private maps and other famous charmaps.
 *
 * Characters 1-32 are always the control characters (part of CL) and cannot be
 * mapped. Characters 34-127 (94 characters) are part of GL and can be mapped.
 * Characters 33 and 128 are not part of GL and always mapped by the VTE.
 * However, for GR they can be mapped differently (96 chars) so we have to
 * include them. The mapper has to take care not to use them in GL.
 */

#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "term-internal.h"

/*
 * Lower Unicode character set. This maps the characters to the basic ASCII
 * characters 33-126. These are all graphics characters defined in ASCII.
 */
term_charset term_unicode_lower = {
        [0] = 32,
        [1] = 33,
        [2] = 34,
        [3] = 35,
        [4] = 36,
        [5] = 37,
        [6] = 38,
        [7] = 39,
        [8] = 40,
        [9] = 41,
        [10] = 42,
        [11] = 43,
        [12] = 44,
        [13] = 45,
        [14] = 46,
        [15] = 47,
        [16] = 48,
        [17] = 49,
        [18] = 50,
        [19] = 51,
        [20] = 52,
        [21] = 53,
        [22] = 54,
        [23] = 55,
        [24] = 56,
        [25] = 57,
        [26] = 58,
        [27] = 59,
        [28] = 60,
        [29] = 61,
        [30] = 62,
        [31] = 63,
        [32] = 64,
        [33] = 65,
        [34] = 66,
        [35] = 67,
        [36] = 68,
        [37] = 69,
        [38] = 70,
        [39] = 71,
        [40] = 72,
        [41] = 73,
        [42] = 74,
        [43] = 75,
        [44] = 76,
        [45] = 77,
        [46] = 78,
        [47] = 79,
        [48] = 80,
        [49] = 81,
        [50] = 82,
        [51] = 83,
        [52] = 84,
        [53] = 85,
        [54] = 86,
        [55] = 87,
        [56] = 88,
        [57] = 89,
        [58] = 90,
        [59] = 91,
        [60] = 92,
        [61] = 93,
        [62] = 94,
        [63] = 95,
        [64] = 96,
        [65] = 97,
        [66] = 98,
        [67] = 99,
        [68] = 100,
        [69] = 101,
        [70] = 102,
        [71] = 103,
        [72] = 104,
        [73] = 105,
        [74] = 106,
        [75] = 107,
        [76] = 108,
        [77] = 109,
        [78] = 110,
        [79] = 111,
        [80] = 112,
        [81] = 113,
        [82] = 114,
        [83] = 115,
        [84] = 116,
        [85] = 117,
        [86] = 118,
        [87] = 119,
        [88] = 120,
        [89] = 121,
        [90] = 122,
        [91] = 123,
        [92] = 124,
        [93] = 125,
        [94] = 126,
        [95] = 127,
};

/*
 * Upper Unicode Table
 * This maps all characters to the upper unicode characters 161-254. These are
 * not compatible to any older 8 bit character sets. See the Unicode standard
 * for the definitions of each symbol.
 */
term_charset term_unicode_upper = {
        [0] = 160,
        [1] = 161,
        [2] = 162,
        [3] = 163,
        [4] = 164,
        [5] = 165,
        [6] = 166,
        [7] = 167,
        [8] = 168,
        [9] = 169,
        [10] = 170,
        [11] = 171,
        [12] = 172,
        [13] = 173,
        [14] = 174,
        [15] = 175,
        [16] = 176,
        [17] = 177,
        [18] = 178,
        [19] = 179,
        [20] = 180,
        [21] = 181,
        [22] = 182,
        [23] = 183,
        [24] = 184,
        [25] = 185,
        [26] = 186,
        [27] = 187,
        [28] = 188,
        [29] = 189,
        [30] = 190,
        [31] = 191,
        [32] = 192,
        [33] = 193,
        [34] = 194,
        [35] = 195,
        [36] = 196,
        [37] = 197,
        [38] = 198,
        [39] = 199,
        [40] = 200,
        [41] = 201,
        [42] = 202,
        [43] = 203,
        [44] = 204,
        [45] = 205,
        [46] = 206,
        [47] = 207,
        [48] = 208,
        [49] = 209,
        [50] = 210,
        [51] = 211,
        [52] = 212,
        [53] = 213,
        [54] = 214,
        [55] = 215,
        [56] = 216,
        [57] = 217,
        [58] = 218,
        [59] = 219,
        [60] = 220,
        [61] = 221,
        [62] = 222,
        [63] = 223,
        [64] = 224,
        [65] = 225,
        [66] = 226,
        [67] = 227,
        [68] = 228,
        [69] = 229,
        [70] = 230,
        [71] = 231,
        [72] = 232,
        [73] = 233,
        [74] = 234,
        [75] = 235,
        [76] = 236,
        [77] = 237,
        [78] = 238,
        [79] = 239,
        [80] = 240,
        [81] = 241,
        [82] = 242,
        [83] = 243,
        [84] = 244,
        [85] = 245,
        [86] = 246,
        [87] = 247,
        [88] = 248,
        [89] = 249,
        [90] = 250,
        [91] = 251,
        [92] = 252,
        [93] = 253,
        [94] = 254,
        [95] = 255,
};

/*
 * The DEC supplemental graphics set. For its definition see here:
 *  http://vt100.net/docs/vt220-rm/table2-3b.html
 * Its basically a mixture of common European symbols that are not part of
 * ASCII. Most often, this is mapped into GR to extend the basci ASCII part.
 *
 * This is very similar to unicode_upper, however, few symbols differ so do not
 * mix them up!
 */
term_charset term_dec_supplemental_graphics = {
        [0] = -1,       /* undefined */
        [1] = 161,
        [2] = 162,
        [3] = 163,
        [4] = 0,
        [5] = 165,
        [6] = 0,
        [7] = 167,
        [8] = 164,
        [9] = 169,
        [10] = 170,
        [11] = 171,
        [12] = 0,
        [13] = 0,
        [14] = 0,
        [15] = 0,
        [16] = 176,
        [17] = 177,
        [18] = 178,
        [19] = 179,
        [20] = 0,
        [21] = 181,
        [22] = 182,
        [23] = 183,
        [24] = 0,
        [25] = 185,
        [26] = 186,
        [27] = 187,
        [28] = 188,
        [29] = 189,
        [30] = 0,
        [31] = 191,
        [32] = 192,
        [33] = 193,
        [34] = 194,
        [35] = 195,
        [36] = 196,
        [37] = 197,
        [38] = 198,
        [39] = 199,
        [40] = 200,
        [41] = 201,
        [42] = 202,
        [43] = 203,
        [44] = 204,
        [45] = 205,
        [46] = 206,
        [47] = 207,
        [48] = 0,
        [49] = 209,
        [50] = 210,
        [51] = 211,
        [52] = 212,
        [53] = 213,
        [54] = 214,
        [55] = 338,
        [56] = 216,
        [57] = 217,
        [58] = 218,
        [59] = 219,
        [60] = 220,
        [61] = 376,
        [62] = 0,
        [63] = 223,
        [64] = 224,
        [65] = 225,
        [66] = 226,
        [67] = 227,
        [68] = 228,
        [69] = 229,
        [70] = 230,
        [71] = 231,
        [72] = 232,
        [73] = 233,
        [74] = 234,
        [75] = 235,
        [76] = 236,
        [77] = 237,
        [78] = 238,
        [79] = 239,
        [80] = 0,
        [81] = 241,
        [82] = 242,
        [83] = 243,
        [84] = 244,
        [85] = 245,
        [86] = 246,
        [87] = 339,
        [88] = 248,
        [89] = 249,
        [90] = 250,
        [91] = 251,
        [92] = 252,
        [93] = 255,
        [94] = 0,
        [95] = -1,       /* undefined */
};

/*
 * DEC special graphics character set. See here for its definition:
 *  http://vt100.net/docs/vt220-rm/table2-4.html
 * This contains several characters to create ASCII drawings and similar. Its
 * commonly mapped into GR to extend the basic ASCII characters.
 *
 * Lower 62 characters map to ASCII 33-64, everything beyond is special and
 * commonly used for ASCII drawings. It depends on the Unicode Standard 3.2 for
 * the extended horizontal scan-line characters 3, 5, 7, and 9.
 */
term_charset term_dec_special_graphics = {
        [0] = -1,       /* undefined */
        [1] = 33,
        [2] = 34,
        [3] = 35,
        [4] = 36,
        [5] = 37,
        [6] = 38,
        [7] = 39,
        [8] = 40,
        [9] = 41,
        [10] = 42,
        [11] = 43,
        [12] = 44,
        [13] = 45,
        [14] = 46,
        [15] = 47,
        [16] = 48,
        [17] = 49,
        [18] = 50,
        [19] = 51,
        [20] = 52,
        [21] = 53,
        [22] = 54,
        [23] = 55,
        [24] = 56,
        [25] = 57,
        [26] = 58,
        [27] = 59,
        [28] = 60,
        [29] = 61,
        [30] = 62,
        [31] = 63,
        [32] = 64,
        [33] = 65,
        [34] = 66,
        [35] = 67,
        [36] = 68,
        [37] = 69,
        [38] = 70,
        [39] = 71,
        [40] = 72,
        [41] = 73,
        [42] = 74,
        [43] = 75,
        [44] = 76,
        [45] = 77,
        [46] = 78,
        [47] = 79,
        [48] = 80,
        [49] = 81,
        [50] = 82,
        [51] = 83,
        [52] = 84,
        [53] = 85,
        [54] = 86,
        [55] = 87,
        [56] = 88,
        [57] = 89,
        [58] = 90,
        [59] = 91,
        [60] = 92,
        [61] = 93,
        [62] = 94,
        [63] = 0,
        [64] = 9830,
        [65] = 9618,
        [66] = 9225,
        [67] = 9228,
        [68] = 9229,
        [69] = 9226,
        [70] = 176,
        [71] = 177,
        [72] = 9252,
        [73] = 9227,
        [74] = 9496,
        [75] = 9488,
        [76] = 9484,
        [77] = 9492,
        [78] = 9532,
        [79] = 9146,
        [80] = 9147,
        [81] = 9472,
        [82] = 9148,
        [83] = 9149,
        [84] = 9500,
        [85] = 9508,
        [86] = 9524,
        [87] = 9516,
        [88] = 9474,
        [89] = 8804,
        [90] = 8805,
        [91] = 960,
        [92] = 8800,
        [93] = 163,
        [94] = 8901,
        [95] = -1,      /* undefined */
};