diff --git a/sakura/sakura.vcxproj b/sakura/sakura.vcxproj
index 7d606076de..2fdc7ff9f2 100644
--- a/sakura/sakura.vcxproj
+++ b/sakura/sakura.vcxproj
@@ -415,6 +415,7 @@
+
@@ -783,6 +784,7 @@
+
diff --git a/sakura/sakura.vcxproj.filters b/sakura/sakura.vcxproj.filters
index 659a552187..26e57cd9d7 100644
--- a/sakura/sakura.vcxproj.filters
+++ b/sakura/sakura.vcxproj.filters
@@ -1112,6 +1112,9 @@
Cpp Source Files\recent
+
+ Cpp Source Files\extmodule
+
@@ -2306,6 +2309,9 @@
Cpp Source Files\convert
+
+ Cpp Source Files\extmodule
+
diff --git a/sakura_core/charset/icu4c/CharsetDetector.cpp b/sakura_core/charset/icu4c/CharsetDetector.cpp
index 78c3f7400f..50bb01bad9 100644
--- a/sakura_core/charset/icu4c/CharsetDetector.cpp
+++ b/sakura_core/charset/icu4c/CharsetDetector.cpp
@@ -28,8 +28,10 @@
CharsetDetector::CharsetDetector() noexcept
: _icuin()
, _csd(nullptr)
+ , _ud(nullptr)
{
_icuin.InitDll();
+ _uchardet.InitDll();
}
CharsetDetector::~CharsetDetector() noexcept
@@ -37,32 +39,13 @@ CharsetDetector::~CharsetDetector() noexcept
if (_icuin.IsAvailable()) {
_icuin.ucsdet_close(_csd);
}
+ if (_uchardet.IsAvailable()) {
+ _uchardet.uchardet_delete(_ud);
+ }
}
-ECodeType CharsetDetector::Detect(const std::string_view& bytes)
+static ECodeType name2code(std::string_view name)
{
- UErrorCode status = U_ZERO_ERROR;
-
- _csd = _icuin.ucsdet_open(&status);
- if (status != U_ZERO_ERROR) {
- return CODE_ERROR;
- }
-
- _icuin.ucsdet_setText(_csd, bytes.data(), bytes.length(), &status);
- if (status != U_ZERO_ERROR) {
- return CODE_ERROR;
- }
-
- const auto csm = _icuin.ucsdet_detect(_csd, &status);
- if (status != U_ZERO_ERROR) {
- return CODE_ERROR;
- }
-
- std::string_view name = _icuin.ucsdet_getName(csm, &status);
- if (status != U_ZERO_ERROR) {
- return CODE_ERROR;
- }
-
// 文字セット名⇒サクラエディタ内部コードの変換
if (name == "UTF-8") return CODE_UTF8;
if (name == "SHIFT_JIS") return CODE_SJIS;
@@ -72,6 +55,57 @@ ECodeType CharsetDetector::Detect(const std::string_view& bytes)
if (name == "ISO-2022-JP") return CODE_JIS;
if (name == "UTF-7") return CODE_UTF7;
if (name == "ISO-8859-1") return CODE_LATIN1;
+ // ここから下は数が多いのでどうしたものか…
+ // https://www.freedesktop.org/wiki/Software/uchardet/
+ // https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
+ if (name == "GB2312") return (ECodeType)936;
+ if (name == "BIG5") return (ECodeType)950;
+ if (name == "ISO-2022-KR") return (ECodeType)50225;
+ if (name == "GB18030") return (ECodeType)54936;
+ return CODE_ERROR;
+}
+
+ECodeType CharsetDetector::Detect(const std::string_view& bytes)
+{
+ if (_icuin.IsAvailable()) {
+ UErrorCode status = U_ZERO_ERROR;
+ _csd = _icuin.ucsdet_open(&status);
+ if (status != U_ZERO_ERROR) {
+ return CODE_ERROR;
+ }
+
+ _icuin.ucsdet_setText(_csd, bytes.data(), bytes.length(), &status);
+ if (status != U_ZERO_ERROR) {
+ return CODE_ERROR;
+ }
+
+ const auto csm = _icuin.ucsdet_detect(_csd, &status);
+ if (status != U_ZERO_ERROR) {
+ return CODE_ERROR;
+ }
+ std::string_view name = _icuin.ucsdet_getName(csm, &status);
+ if (status != U_ZERO_ERROR) {
+ return CODE_ERROR;
+ }
+ return name2code(name);
+ }
+ if (_uchardet.IsAvailable()) {
+ if (!_ud) {
+ _ud = _uchardet.uchardet_new();
+ }
+ if (!_ud) {
+ return CODE_ERROR;
+ }
+ int ret = _uchardet.uchardet_handle_data(_ud, bytes.data(), bytes.length());
+ if (ret != 0) {
+ return CODE_ERROR;
+ }
+ _uchardet.uchardet_data_end(_ud);
+ std::string_view name = _uchardet.uchardet_get_charset(_ud);
+ auto code = name2code(name);
+ _uchardet.uchardet_reset(_ud);
+ return code;
+ }
return CODE_ERROR;
}
diff --git a/sakura_core/charset/icu4c/CharsetDetector.h b/sakura_core/charset/icu4c/CharsetDetector.h
index 23699a2601..1f36ff7d47 100644
--- a/sakura_core/charset/icu4c/CharsetDetector.h
+++ b/sakura_core/charset/icu4c/CharsetDetector.h
@@ -29,6 +29,7 @@
#include
#include "extmodule/CIcu4cI18n.h"
+#include "extmodule/CUchardet.h"
/*!
* @brief 文字コード検出クラス
@@ -38,12 +39,15 @@ class CharsetDetector final
CIcu4cI18n _icuin;
UCharsetDetector* _csd;
+ CUchardet _uchardet;
+ uchardet_t _ud;
+
public:
CharsetDetector() noexcept;
~CharsetDetector() noexcept;
bool IsAvailable() const noexcept {
- return _icuin.IsAvailable();
+ return _icuin.IsAvailable() || _uchardet.IsAvailable();
}
ECodeType Detect(const std::string_view& bytes);
diff --git a/sakura_core/extmodule/CUchardet.cpp b/sakura_core/extmodule/CUchardet.cpp
new file mode 100644
index 0000000000..5bcac38612
--- /dev/null
+++ b/sakura_core/extmodule/CUchardet.cpp
@@ -0,0 +1,58 @@
+/*! @file */
+/*
+ Copyright (C) 2018-2021, Sakura Editor Organization
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented;
+ you must not claim that you wrote the original software.
+ If you use this software in a product, an acknowledgment
+ in the product documentation would be appreciated but is
+ not required.
+
+ 2. Altered source versions must be plainly marked as such,
+ and must not be misrepresented as being the original software.
+
+ 3. This notice may not be removed or altered from any source
+ distribution.
+*/
+#include "StdAfx.h"
+#include "CUchardet.h"
+
+/*!
+ * @brief DLLの名前を返す
+ */
+LPCWSTR CUchardet::GetDllNameImp( [[maybe_unused]] int index )
+{
+ return L"uchardet.dll";
+}
+
+/*!
+ DLLの初期化
+
+ 関数のアドレスを取得してメンバに保管する.
+
+ @retval true 成功
+ @retval false アドレス取得に失敗
+*/
+bool CUchardet::InitDllImp()
+{
+ // DLL内関数名リスト
+ const ImportTable table[] = {
+ { &_uchardet_new, "uchardet_new" },
+ { &_uchardet_delete, "uchardet_delete" },
+ { &_uchardet_handle_data, "uchardet_handle_data" },
+ { &_uchardet_data_end, "uchardet_data_end" },
+ { &_uchardet_reset, "uchardet_reset" },
+ { &_uchardet_get_charset, "uchardet_get_charset" },
+ { NULL, 0 }
+ };
+ return RegisterEntries(table);
+}
+
diff --git a/sakura_core/extmodule/CUchardet.h b/sakura_core/extmodule/CUchardet.h
new file mode 100644
index 0000000000..cc3aee5ad7
--- /dev/null
+++ b/sakura_core/extmodule/CUchardet.h
@@ -0,0 +1,60 @@
+/*! @file */
+/*
+ Copyright (C) 2018-2021, Sakura Editor Organization
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented;
+ you must not claim that you wrote the original software.
+ If you use this software in a product, an acknowledgment
+ in the product documentation would be appreciated but is
+ not required.
+
+ 2. Altered source versions must be plainly marked as such,
+ and must not be misrepresented as being the original software.
+
+ 3. This notice may not be removed or altered from any source
+ distribution.
+*/
+#pragma once
+
+#include "CDllHandler.h"
+
+typedef struct uchardet * uchardet_t;
+
+/*!
+ * uchardet ライブラリ(uchardet.dll) をラップするクラス
+ */
+class CUchardet final : public CDllImp
+{
+public:
+ CUchardet() noexcept = default;
+
+ // DLL関数ポインタ
+ uchardet_t (*_uchardet_new)(void) = nullptr;
+ void (*_uchardet_delete)(uchardet_t ud) = nullptr;
+ int (*_uchardet_handle_data)(uchardet_t ud, const char * data, size_t len) = nullptr;
+ void (*_uchardet_data_end)(uchardet_t ud) = nullptr;
+ void (*_uchardet_reset)(uchardet_t ud) = nullptr;
+ const char * (*_uchardet_get_charset)(uchardet_t ud) = nullptr;
+
+protected:
+ // CDllImpインタフェース
+ LPCWSTR GetDllNameImp(int nIndex) override;
+ bool InitDllImp() override;
+
+public:
+ uchardet_t uchardet_new(void) { return _uchardet_new(); }
+ void uchardet_delete(uchardet_t ud) { _uchardet_delete(ud); }
+ int uchardet_handle_data(uchardet_t ud, const char * data, size_t len) { return _uchardet_handle_data(ud, data, len); }
+ void uchardet_data_end(uchardet_t ud) { _uchardet_data_end(ud); }
+ void uchardet_reset(uchardet_t ud) { _uchardet_reset(ud); }
+ const char * uchardet_get_charset(uchardet_t ud) { return _uchardet_get_charset(ud); }
+};
+