Skip to content

Commit b37847f

Browse files
committed
Add support for writing Windows-1252
1 parent f768a4c commit b37847f

File tree

7 files changed

+102
-14
lines changed

7 files changed

+102
-14
lines changed

.editorconfig

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ charset = utf-8
2727
insert_final_newline = true
2828
[*.testxt]
2929
charset = unset
30-
end_of_line = lf
30+
end_of_line = unset
3131
insert_final_newline = true
3232
indent_style = space
3333
indent_size = 4

include/libnex/base.h

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
base.h - contains macros that handle base stuff
3+
Copyright 2022 The NexNix Project
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
There should be a copy of the License distributed in a file named
8+
LICENSE, if not, you may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
/// @file base.h
20+
21+
#ifndef _BASE_H
22+
#define _BASE_H
23+
24+
// Used to specify that a parameter is unused
25+
#define UNUSED(param) (void) (param);
26+
27+
// To find the size of an array
28+
#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
29+
30+
#endif

src/libnex_baremetal.h

-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,4 @@
2626
#include <libnex/endian.h>
2727
#include <libnex/safestring.h>
2828

29-
#define UNUSED(param) (void) (param);
30-
3129
#endif

src/libnex_hosted.h

-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,4 @@
3131
#include <libnex/safestring.h>
3232
#include <libnex/textstream.h>
3333

34-
#define UNUSED(param) (void) (param);
35-
3634
#endif

src/textstream.c

+42-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include "codepages/win1252toUtf32.h"
2222
#include <errno.h>
23+
#include <libnex/base.h>
2324
#include <libnex/bits.h>
2425
#include <libnex/safemalloc.h>
2526
#include <libnex/textstream.h>
@@ -175,7 +176,7 @@ PUBLIC void TextSetBufSz (TextStream_t* stream, size_t sz)
175176
}
176177

177178
// Checks if we reached a newline
178-
char _textCheckNewLine (wchar_t* buf, int i, size_t bytesLeft)
179+
char _textCheckNewLine (uint8_t* buf, int i, size_t bytesLeft)
179180
{
180181
if (buf[i] == '\n' || buf[i] == '\r')
181182
{
@@ -203,7 +204,7 @@ ssize_t _textDecode (TextStream_t* stream, wchar_t* buf, size_t count, int termi
203204
// Check if a terminator was reached
204205
if (terminator == 1)
205206
{
206-
char doWhat = _textCheckNewLine (buf, i, count);
207+
char doWhat = _textCheckNewLine (stream->buf, i, count);
207208
if (doWhat == 0)
208209
continue;
209210
else if (doWhat == 1)
@@ -234,7 +235,7 @@ ssize_t _textDecode (TextStream_t* stream, wchar_t* buf, size_t count, int termi
234235
// Check if a terminator was reached
235236
if (terminator == 1)
236237
{
237-
char doWhat = _textCheckNewLine (buf, i, count);
238+
char doWhat = _textCheckNewLine (stream->buf, i, count);
238239
if (doWhat == 0)
239240
continue;
240241
else if (doWhat == 1)
@@ -264,8 +265,43 @@ ssize_t _textEncode (TextStream_t* stream, wchar_t* buf, size_t count)
264265
return -1;
265266
}
266267
stream->buf[i] = (uint8_t) buf[i];
267-
charEncoded = (ssize_t) count;
268268
}
269+
charEncoded = (ssize_t) count;
270+
}
271+
else if (stream->encoding == TEXT_ENC_WIN1252)
272+
{
273+
// Loop and encode
274+
for (int i = 0; i < count; ++i)
275+
{
276+
// This is where the algorithm starts. Check if this character's Unicode code
277+
// is the same as its Windows-1252 one. If it is, directly copy to destination
278+
// buffer
279+
if (buf[i] <= 0x7F || (buf[i] >= 0xA0 && buf[i] <= 0xFF))
280+
{
281+
// Copy out
282+
stream->buf[i] = (uint8_t) buf[i];
283+
}
284+
// It's a Windows-1252 character
285+
else
286+
{
287+
// This is kind of slow, but the best way overall.
288+
// We loop through the translation table until we find character that matches
289+
// buf[i]. We set bit 7 on the index, and that's the character
290+
int tableSize = ARRAY_SIZE (win1252toUtf32);
291+
int tableIndex = 0;
292+
while (tableIndex < tableSize)
293+
{
294+
// Check for a match
295+
if (win1252toUtf32[tableIndex] == buf[i])
296+
{
297+
// Set bit 7 on tableIndex, and that is the character
298+
stream->buf[i] = BitSetNew (tableIndex, 7);
299+
}
300+
tableIndex++;
301+
}
302+
}
303+
}
304+
charEncoded = (ssize_t) count;
269305
}
270306
return charEncoded;
271307
}
@@ -303,7 +339,7 @@ PUBLIC ssize_t TextRead (TextStream_t* stream, wchar_t* buf, size_t count)
303339
// Read the data into the staging buffer
304340
ssize_t charRead = (ssize_t) fread (stream->buf, 1, count * sizeof (wchar_t), stream->file);
305341
// Decode the string
306-
ssize_t charParsed = _textDecode (stream, buf, charRead, 0);
342+
ssize_t charParsed = _textDecode (stream, buf, charRead, TEXT_DECODE_ALL);
307343
if (charParsed == -1)
308344
return -1;
309345
// That's it
@@ -345,7 +381,7 @@ PUBLIC ssize_t TextReadLine (TextStream_t* stream, wchar_t* buf, size_t count)
345381
// Read the data into the staging buffer
346382
ssize_t charRead = (ssize_t) fread (stream->buf, 1, count * sizeof (wchar_t), stream->file);
347383
// Decode the string
348-
ssize_t charParsed = _textDecode (stream, buf, charRead, 1);
384+
ssize_t charParsed = _textDecode (stream, buf, charRead, TEXT_DECODE_TERMINATE_ON_NEWLINE);
349385
// That's it
350386
TextUnlock (stream);
351387
return charParsed;

tests/testAscii1.testxt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Test string. This is an ASCII document.
1+
Test string. This is an ASCII document.

tests/textstream.c

+28-2
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,21 @@ int main()
3737
return 1;
3838
// FIXME: If C compiler doesn't use unicode by default, these wcscmp's may not work
3939
// Luckily, CL uses UTF-16 and Clang and GCC use UTF-32, making this work most of the time.
40-
wchar_t buf2[] = L"Test string. This is an ASCII document.\n";
40+
wchar_t buf2[] = L"Test string. This is an ASCII document.\r\n";
4141
TEST_BOOL (!wcscmp (buf, buf2), "reading ASCII");
4242
// Test TextSize
4343
TEST (TextSize (stream), wcslen (buf2), "TextSize");
4444
free (buf);
4545
TextClose (stream);
46+
// Test TextReadLine
47+
stream = TextOpen ("testAscii1.testxt", TEXT_MODE_READ, TEXT_ENC_ASCII, 0);
48+
buf = (wchar_t*) malloc_s (500 * sizeof (wchar_t));
49+
if (TextReadLine (stream, buf, 500) == -1)
50+
return 1;
51+
wchar_t buf5[] = L"Test string. This is an ASCII document.\r\n";
52+
TEST_BOOL (!wcscmp (buf, buf5), "reading a line of ASCII");
53+
TextClose (stream);
54+
free (buf);
4655
// Write out some text
4756
wchar_t buf3[] = L"This is a test document.\n";
4857
// Create a new file
@@ -70,11 +79,28 @@ int main()
7079
return 1;
7180
wchar_t* buf = (wchar_t*) malloc_s (500 * sizeof (wchar_t));
7281
if (TextRead (stream1, buf, 500) == -1)
73-
return -1;
82+
return 1;
7483
wchar_t buf2[] = L"Test windows 1252 document. Here is a non-ASCII character: ÿ Ž\n";
7584
TEST_BOOL (!wcscmp (buf, buf2), "reading Windows 1252");
7685
free (buf);
7786
TextClose (stream1);
87+
// Test writing it
88+
TextStream_t* stream2 = TextOpen ("testWin1252.testout", TEXT_MODE_WRITE, TEXT_ENC_WIN1252, 0);
89+
if (!stream2)
90+
return 1;
91+
if (TextWrite (stream2, buf2, wcslen (buf2)) == -1)
92+
return 1;
93+
TextClose (stream2);
94+
// Read and compare
95+
buf = (wchar_t*) malloc_s (500 * sizeof (wchar_t));
96+
stream2 = TextOpen ("testWin1252.testout", TEXT_MODE_READ, TEXT_ENC_WIN1252, 0);
97+
if (!stream2)
98+
return 1;
99+
if (TextRead (stream2, buf, wcslen (buf2)) == -1)
100+
return 1;
101+
TEST_BOOL (!wcscmp (buf, buf2), "writing Windows 1252");
102+
TextClose (stream2);
103+
free (buf);
78104
}
79105
return 0;
80106
}

0 commit comments

Comments
 (0)