Skip to content

Commit f708dca

Browse files
committed
Allow for UTF-16 files to be read
1 parent a7c544f commit f708dca

File tree

8 files changed

+292
-27
lines changed

8 files changed

+292
-27
lines changed

CMakeLists.txt

-2
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,6 @@ list(APPEND LIBNEX_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/include ${CMAKE_BINARY_DIR}
149149
# Create the library
150150
add_library(nex ${LIBNEX_SOURCES})
151151

152-
# Set compiler flags
153-
154152
# Set SOName
155153
set_target_properties(nex PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION})
156154
# Include the directories

NOTICE

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Many files in this directory are original works of
2+
The NexNix Project. The NexNix Project consists of the following developers:
3+
4+
Jedidiah Thompson

include/libnex/bits.h

+10
Original file line numberDiff line numberDiff line change
@@ -120,4 +120,14 @@
120120
*/
121121
#define BitGetRange(set, start, count) (((set) >> (start)) & ((1 << (count)) - 1))
122122

123+
/**
124+
* @brief Masks off certain bits
125+
*
126+
* BitMask() returns those bits a specified set has in common with a specified mask
127+
* @param set the value to mask in
128+
* @param mask the bit mask
129+
* @return the bits in common
130+
*/
131+
#define BitMask(set, mask) ((set) & (mask))
132+
123133
#endif

include/libnex/textstream.h

+24-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include <libnex/decls.h>
2525
#include <libnex/object.h>
26+
#include <stdbool.h>
2627
#include <stddef.h>
2728
#include <stdint.h>
2829
#include <stdio.h>
@@ -49,8 +50,11 @@
4950
#define TEXT_SYS_ERROR 2 ///< errno contains the error
5051
#define TEXT_INVALID_PARAMETER 3 ///< User passed an invalid parameter
5152
#define TEXT_BAD_BOM 4 ///< A bad BOM was encountered
52-
#define TEXT_NARROW_WCHAR 5 ///< Attempting to parse UTF-32 on a system with a narrow wchar_t
53+
#define TEXT_NARROW_WCHAR 5 ///< Attempting to parse UTF-32 on system with narrow wchar_t
5354
#define TEXT_INVALID_CHAR 6 ///< Character doesn't fit in destination character set
55+
#define TEXT_BUF_TOO_SMALL 7 ///< Character won't fit in buffer
56+
#define TEXT_NO_SURROGATE \
57+
8 ///< User specified that no surrogates should be expanded. Only affect systems where sizeof(wchar_t) == 2
5458

5559
__DECL_START
5660

@@ -70,6 +74,8 @@ typedef struct _TextStream
7074
size_t bufSize; ///< Size of above buffer (defaults to 512 bytes)
7175
char encoding; ///< Underlying encoding of the stream
7276
char order; ///< Order of bytes for multi byte character sets
77+
bool expandSur; ///< Wheter surrogate pairs should be expanded.
78+
char encSize; ///< Size of one char in the encoding
7379
} TextStream_t;
7480

7581
/**
@@ -104,12 +110,18 @@ PUBLIC void TextClose (TextStream_t* stream);
104110
* TextRead takes a buffer, character count, and stream object,
105111
* and reads / decodes count codepoints into buf from stream.
106112
* Data is intially read into a staging buffer, and then the staging buffer is
107-
* decoded into the main buffer specified by buf
113+
* decoded into the main buffer specified by buf.
114+
*
115+
* WARNING: If you are on a platform where sizeof(wchar_t) == 2 and you are decoding a UTF-16 stream,
116+
* you SHALL make buf's size equal to the number of characters you want to decode times 2.
117+
* This is in case buf contains surrogate pairs; those are copied as one character and take 4 bytes.
118+
* Ensure count is equal to the number of characters, NOT the size you malloc'ed.
119+
* Else, TextRead will fail with error TEXT_BUF_TOO_SMALL
108120
*
109121
* @param[in] stream the stream to read from
110122
* @param[out] buf a buffer of wchar_t's to decode into
111123
* @param[in] count the number of wchar_t's to decode
112-
* @param[out] the number or characters read
124+
* @param[out] charsRead the number or characters read
113125
* @return a result code
114126
*/
115127
PUBLIC short TextRead (TextStream_t* stream, wchar_t* buf, const size_t count, size_t* charsRead);
@@ -169,6 +181,13 @@ PUBLIC long TextSize (TextStream_t* stream);
169181
*/
170182
PUBLIC void TextSetBufSz (TextStream_t* stream, size_t sz);
171183

184+
/**
185+
* @brief Returns t a textual representation of code
186+
* @param code the error code turn into a string
187+
* @return the string message
188+
*/
189+
PUBLIC const char* TextError (int code);
190+
172191
__DECL_END
173192

174193
// Helper macros
@@ -179,5 +198,7 @@ __DECL_END
179198
#define TextLock(item) (ObjLock (&(item)->obj)) ///< Locks this stream
180199
#define TextUnlock(item) (ObjUnlock (&(item)->obj)) ///< Unlocks the stream
181200
#define TextDeRef(item) (ObjDestroy (&(item)->obj)) ///< Dereferences this stream
201+
#define TextNoSurroate(stream) \
202+
((stream)->expandSur = false) ///< Necessary if the user doesn't want to deal with surrogate pairs
182203

183204
#endif

0 commit comments

Comments
 (0)