Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .circleci/scripts/run-tests-cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

set -euo pipefail

pushd runtime/Cpp
ctest
popd

pushd runtime-testsuite
echo "running maven tests..."
if [ $GROUP == "LEXER" ]; then
Expand Down
8 changes: 6 additions & 2 deletions .github/scripts/run-tests-cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@

set -euo pipefail

cd runtime-testsuite/
pushd runtime/Cpp
ctest
popd

pushd runtime-testsuite
if [ $GROUP == "LEXER" ]; then
mvn -q -Dgroups="org.antlr.v4.test.runtime.category.LexerTests" -Dtest=cpp.** test
elif [ $GROUP == "PARSER" ]; then
mvn -q -Dgroups="org.antlr.v4.test.runtime.category.ParserTests" -Dtest=cpp.** test
elif [ $GROUP == "RECURSION" ]; then
mvn -q -Dgroups="org.antlr.v4.test.runtime.category.LeftRecursionTests" -Dtest=cpp.** test
else
mvn -q -Dtest=cpp.* test
mvn -q -Dtest=cpp.* test
fi
popd
15 changes: 10 additions & 5 deletions .travis/run-tests-cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@

set -euo pipefail

pushd runtime/Cpp
ctest
popd

pushd runtime-testsuite
if [ $GROUP == "LEXER" ]; then
mvn -q -Dgroups="org.antlr.v4.test.runtime.category.LexerTests" -Dtest=cpp.* test
mvn -q -Dgroups="org.antlr.v4.test.runtime.category.LexerTests" -Dtest=cpp.* test
elif [ $GROUP == "PARSER" ]; then
mvn -q -Dgroups="org.antlr.v4.test.runtime.category.ParserTests" -Dtest=cpp.* test
mvn -q -Dgroups="org.antlr.v4.test.runtime.category.ParserTests" -Dtest=cpp.* test
elif [ $GROUP == "RECURSION" ]; then
mvn -q -Dgroups="org.antlr.v4.test.runtime.category.LeftRecursionTests" -Dtest=cpp.* test
mvn -q -Dgroups="org.antlr.v4.test.runtime.category.LeftRecursionTests" -Dtest=cpp.* test
else
mvn -q -Dtest=cpp.* test
mvn -q -Dtest=cpp.* test
fi

popd
6 changes: 4 additions & 2 deletions runtime/Cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# -*- mode:cmake -*-
cmake_minimum_required (VERSION 2.8)
# 2.8 needed because of ExternalProject
cmake_minimum_required (VERSION 3.14)
# 3.14 needed because of FetchContent

enable_testing()

# Detect build type, fallback to release and throw a warning if use didn't specify any
if(NOT CMAKE_BUILD_TYPE)
Expand Down
15 changes: 0 additions & 15 deletions runtime/Cpp/deploy-macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,6 @@
rm -f -R antlr4-runtime build lib 2> /dev/null
rm antlr4-cpp-runtime-macos.zip 2> /dev/null

# Get utf8 dependency.
mkdir -p runtime/thirdparty 2> /dev/null
pushd runtime/thirdparty
if [ ! -d utfcpp ]
then
git clone https://github.com/nemtrif/utfcpp.git utfcpp
pushd utfcpp
git checkout tags/v3.1.1
popd
fi
popd

# Binaries
xcodebuild -project runtime/antlrcpp.xcodeproj \
-target antlr4 \
Expand All @@ -35,9 +23,6 @@ rm -f -R antlr4-runtime
pushd runtime/src
find . -name '*.h' | cpio -pdm ../../antlr4-runtime
popd
pushd runtime/thirdparty/utfcpp/source
find . -name '*.h' | cpio -pdm ../../../../antlr4-runtime
popd

# Zip up and clean up
zip -r antlr4-cpp-runtime-macos.zip antlr4-runtime lib
Expand Down
73 changes: 25 additions & 48 deletions runtime/Cpp/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,45 +35,35 @@ add_custom_target(make_lib_output_dir ALL
add_dependencies(antlr4_shared make_lib_output_dir)
add_dependencies(antlr4_static make_lib_output_dir)

find_package(utf8cpp QUIET)
include(FetchContent)

set(INSTALL_utf8cpp FALSE)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/e2239ee6043f73722e7aa812a459f54a28552929.zip
)

if (utf8cpp_FOUND)
target_link_libraries(antlr4_shared utf8cpp)
target_link_libraries(antlr4_static utf8cpp)
else()
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)

# older utf8cpp doesn't define the package above
find_path(utf8cpp_HEADER utf8.h
PATH_SUFFIXES utf8cpp
)
FetchContent_MakeAvailable(googletest)

if (utf8cpp_HEADER)
include_directories(${utf8cpp_HEADER})
else()
include(${CMAKE_ROOT}/Modules/ExternalProject.cmake)
set(THIRDPARTY_DIR ${CMAKE_BINARY_DIR}/runtime/thirdparty)
set(UTFCPP_DIR ${THIRDPARTY_DIR}/utfcpp)
ExternalProject_Add(
utf8cpp
GIT_REPOSITORY "https://github.com/nemtrif/utfcpp"
GIT_TAG "v3.1.1"
SOURCE_DIR ${UTFCPP_DIR}
UPDATE_DISCONNECTED 1
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${UTFCPP_DIR}/install -DUTF8_TESTS=off -DUTF8_SAMPLES=off
STEP_TARGETS build)

include_directories(
${UTFCPP_DIR}/install/include/utf8cpp
${UTFCPP_DIR}/install/include/utf8cpp/utf8
)

add_dependencies(antlr4_shared utf8cpp)
add_dependencies(antlr4_static utf8cpp)
set(INSTALL_utf8cpp TRUE)
endif()
endif()
file(GLOB libantlrcpp_TESTS
"${PROJECT_SOURCE_DIR}/runtime/tests/*.cpp"
)

add_executable(
antlr4_tests
${libantlrcpp_TESTS}
)

target_link_libraries(
antlr4_tests
antlr4_static
gtest_main
)

include(GoogleTest)

gtest_discover_tests(antlr4_tests)

if(CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_libraries(antlr4_shared ${UUID_LIBRARIES})
Expand Down Expand Up @@ -152,16 +142,3 @@ install(DIRECTORY "${PROJECT_SOURCE_DIR}/runtime/src/"
COMPONENT dev
FILES_MATCHING PATTERN "*.h"
)

if (INSTALL_utf8cpp)
install(FILES "${UTFCPP_DIR}/source/utf8.h"
DESTINATION "include/antlr4-runtime")
install(DIRECTORY "${UTFCPP_DIR}/source/utf8"
DESTINATION "include/antlr4-runtime"
COMPONENT dev
FILES_MATCHING PATTERN "*.h"
)
endif()



41 changes: 29 additions & 12 deletions runtime/Cpp/runtime/src/ANTLRInputStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "misc/Interval.h"
#include "IntStream.h"

#include "support/StringUtils.h"
#include "support/Utf8.h"
#include "support/CPPUtils.h"

#include "ANTLRInputStream.h"
Expand All @@ -35,28 +35,37 @@ ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() {
load(stream);
}

void ANTLRInputStream::load(const std::string &input) {
load(input.data(), input.size());
void ANTLRInputStream::load(const std::string &input, bool lenient) {
load(input.data(), input.size(), lenient);
}

void ANTLRInputStream::load(const char *data, size_t length) {
void ANTLRInputStream::load(const char *data, size_t length, bool lenient) {
// Remove the UTF-8 BOM if present.
const char *bom = "\xef\xbb\xbf";
if (length >= 3 && strncmp(data, bom, 3) == 0)
_data = antlrcpp::utf8_to_utf32(data + 3, data + length);
else
_data = antlrcpp::utf8_to_utf32(data, data + length);
if (length >= 3 && strncmp(data, bom, 3) == 0) {
data += 3;
length -= 3;
}
if (lenient) {
_data = Utf8::lenientDecode(std::string_view(data, length));
} else {
auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length));
if (!maybe_utf32.has_value()) {
throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence");
}
_data = std::move(maybe_utf32).value();
}
p = 0;
}

void ANTLRInputStream::load(std::istream &stream) {
void ANTLRInputStream::load(std::istream &stream, bool lenient) {
if (!stream.good() || stream.eof()) // No fail, bad or EOF.
return;

_data.clear();

std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
load(s.data(), s.length());
load(s.data(), s.length(), lenient);
}

void ANTLRInputStream::reset() {
Expand Down Expand Up @@ -144,7 +153,11 @@ std::string ANTLRInputStream::getText(const Interval &interval) {
return "";
}

return antlrcpp::utf32_to_utf8(_data.substr(start, count));
auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count));
if (!maybeUtf8.has_value()) {
throw IllegalArgumentException("Input stream contains invalid Unicode code points");
}
return std::move(maybeUtf8).value();
}

std::string ANTLRInputStream::getSourceName() const {
Expand All @@ -155,7 +168,11 @@ std::string ANTLRInputStream::getSourceName() const {
}

std::string ANTLRInputStream::toString() const {
return antlrcpp::utf32_to_utf8(_data);
auto maybeUtf8 = Utf8::strictEncode(_data);
if (!maybeUtf8.has_value()) {
throw IllegalArgumentException("Input stream contains invalid Unicode code points");
}
return std::move(maybeUtf8).value();
}

void ANTLRInputStream::InitializeInstanceFields() {
Expand Down
12 changes: 8 additions & 4 deletions runtime/Cpp/runtime/src/ANTLRInputStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace antlr4 {
protected:
/// The data being scanned.
// UTF-32
UTF32String _data;
std::u32string _data;

/// 0..n-1 index into string of next char </summary>
size_t p;
Expand All @@ -34,9 +34,13 @@ namespace antlr4 {
ANTLRInputStream(const char *data, size_t length);
ANTLRInputStream(std::istream &stream);

virtual void load(const std::string &input);
virtual void load(const char *data, size_t length);
virtual void load(std::istream &stream);
virtual void load(const std::string &input, bool lenient);
virtual void load(const char *data, size_t length, bool lenient);
virtual void load(std::istream &stream, bool lenient);

virtual void load(const std::string &input) { load(input, false); }
virtual void load(const char *data, size_t length) { load(data, length, false); }
virtual void load(std::istream &stream) { load(stream, false); }

/// Reset the stream so that it's in the same state it was
/// when the object was created *except* the data array is not
Expand Down
8 changes: 6 additions & 2 deletions runtime/Cpp/runtime/src/UnbufferedCharStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#include "misc/Interval.h"
#include "Exceptions.h"
#include "support/StringUtils.h"
#include "support/Utf8.h"

#include "UnbufferedCharStream.h"

Expand Down Expand Up @@ -195,7 +195,11 @@ std::string UnbufferedCharStream::getText(const misc::Interval &interval) {
}
// convert from absolute to local index
size_t i = interval.a - bufferStartIndex;
return utf32_to_utf8(_data.substr(i, interval.length()));
auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(i, interval.length()));
if (!maybeUtf8.has_value()) {
throw IllegalArgumentException("Unbuffered stream contains invalid Unicode code points");
}
return std::move(maybeUtf8).value();
}

size_t UnbufferedCharStream::getBufferStartIndex() const {
Expand Down
Loading