forked from neomutt/neomutt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mbyte.c
113 lines (100 loc) · 2.84 KB
/
mbyte.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/**
* @file
* Convert strings between multibyte and utf8 encodings
*
* @authors
* Copyright (C) 2000 Edmund Grimley Evans <[email protected]>
*
* @copyright
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 2 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Japanese support by TAKIZAWA Takashi <[email protected]>.
*/
#include "config.h"
#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <string.h>
#include <wchar.h>
#include "mbyte.h"
#include "charset.h"
#include "lib/lib.h"
#include "options.h"
#include "protos.h"
#ifndef EILSEQ
#define EILSEQ EINVAL
#endif
int Charset_is_utf8 = 0;
void mutt_set_charset(char *charset)
{
char buffer[STRING];
mutt_canonical_charset(buffer, sizeof(buffer), charset);
Charset_is_utf8 = 0;
if (mutt_is_utf8(buffer))
Charset_is_utf8 = 1;
#if defined(HAVE_BIND_TEXTDOMAIN_CODESET) && defined(ENABLE_NLS)
bind_textdomain_codeset(PACKAGE, buffer);
#endif
}
wchar_t replacement_char(void)
{
return Charset_is_utf8 ? 0xfffd : '?';
}
bool is_display_corrupting_utf8(wchar_t wc)
{
if (wc == (wchar_t) 0x200f || /* bidi markers: #3827 */
wc == (wchar_t) 0x200e || wc == (wchar_t) 0x00ad || /* soft hyphen: #3848 */
wc == (wchar_t) 0xfeff || /* zero width no-break space */
(wc >= (wchar_t) 0x2066 && /* misc directional markers */
wc <= (wchar_t) 0x2069) ||
(wc >= (wchar_t) 0x202a && /* misc directional markers: #3854 */
wc <= (wchar_t) 0x202e))
return true;
else
return false;
}
int mutt_filter_unprintable(char **s)
{
struct Buffer *b = NULL;
wchar_t wc;
size_t k, k2;
char scratch[MB_LEN_MAX + 1];
char *p = *s;
mbstate_t mbstate1, mbstate2;
if (!(b = mutt_buffer_new()))
return -1;
memset(&mbstate1, 0, sizeof(mbstate1));
memset(&mbstate2, 0, sizeof(mbstate2));
for (; (k = mbrtowc(&wc, p, MB_LEN_MAX, &mbstate1)); p += k)
{
if (k == (size_t)(-1) || k == (size_t)(-2))
{
k = 1;
memset(&mbstate1, 0, sizeof(mbstate1));
wc = replacement_char();
}
if (!IsWPrint(wc))
wc = '?';
else if (Charset_is_utf8 && is_display_corrupting_utf8(wc))
continue;
k2 = wcrtomb(scratch, wc, &mbstate2);
scratch[k2] = '\0';
mutt_buffer_addstr(b, scratch);
}
FREE(s);
*s = b->data ? b->data : safe_calloc(1, 1);
FREE(&b);
return 0;
}