From fa46cf9fc9c6148726f8c09873525c60cc5bca96 Mon Sep 17 00:00:00 2001 From: shirne Date: Fri, 1 Apr 2022 21:51:06 +0800 Subject: [PATCH] codepage with controls characters --- CHANGELOG.md | 4 ++++ README.md | 11 ++--------- analysis_options.yaml | 4 ++++ lib/src/code_page.dart | 20 +++++++++++++++++++- lib/src/euc_jp.dart | 8 ++++---- lib/src/euc_jp_table.dart | 2 ++ lib/src/euc_kr.dart | 7 +++++-- lib/src/gbk.dart | 2 ++ lib/src/shift_jis.dart | 8 ++++---- lib/src/shift_jis_table.dart | 2 ++ lib/src/utf/utf16.dart | 2 +- lib/src/utf/utf32.dart | 2 +- pubspec.lock | 14 ++++++++++++++ pubspec.yaml | 3 ++- test/charset_test.dart | 7 +++++++ 15 files changed, 73 insertions(+), 23 deletions(-) create mode 100644 analysis_options.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index eaf13ea..3f7ec4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.0 + +* CodePage with controls #1 + ## 0.2.4 * fix canEncode for shift-jis invalid codec diff --git a/README.md b/README.md index 6a24d4d..6a0ce0e 100644 --- a/README.md +++ b/README.md @@ -30,13 +30,6 @@ main() { } ``` -## Getting Started +## Notice -This project is a starting point for a Dart -[package](https://flutter.dev/developing-packages/), -a library module containing code that can be shared easily across -multiple Flutter or Dart projects. - -For help getting started with Flutter, view our -[online documentation](https://flutter.dev/docs), which offers tutorials, -samples, guidance on mobile development, and a full API reference. +0.x 版本中CodePage不含控制字符 (0x00-0x31) \ No newline at end of file diff --git a/analysis_options.yaml b/analysis_options.yaml new file mode 100644 index 0000000..a5744c1 --- /dev/null +++ b/analysis_options.yaml @@ -0,0 +1,4 @@ +include: package:flutter_lints/flutter.yaml + +# Additional information about this file can be found at +# https://dart.dev/guides/language/analysis-options diff --git a/lib/src/code_page.dart b/lib/src/code_page.dart index 1cd77d7..f3e555c 100644 --- a/lib/src/code_page.dart +++ b/lib/src/code_page.dart @@ -316,8 +316,13 @@ const _noControls = "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"; +const _controls = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + "\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011" + "\u0012\u0013\u0014\u0015\u0016\u0017\u0018" + "\u0019\u001a\u001b\u001c\u001d\u001e\u001f"; + /// ASCII characters without control characters. Shared by many code pages. -const _ascii = "$_noControls" +const _ascii = "$_controls" r""" !"#$%&'()*+,-./0123456789:;<=>?""" r"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_" "`abcdefghijklmnopqrstuvwxyz{|}~\uFFFD"; @@ -327,7 +332,9 @@ const _ascii = "$_noControls" /// A code page is a way to map bytes to character. /// As such, it can only represent 256 different characters. class CodePage extends Encoding { + @override final CodePageDecoder decoder; + @override final String name; CodePageEncoder? _encoder; @@ -378,13 +385,16 @@ class CodePage extends Encoding { int operator [](int byte) => decoder._char(byte); /// Encodes [input] using `encoder.convert`. + @override Uint8List encode(String input, {int? invalidCharacter}) => encoder.convert(input, invalidCharacter: invalidCharacter); /// Decodes [bytes] using `encoder.convert`. + @override String decode(List bytes, {bool allowInvalid = false}) => decoder.convert(bytes, allowInvalid: allowInvalid); + @override CodePageEncoder get encoder => _encoder ??= decoder._createEncoder(); } @@ -402,6 +412,7 @@ abstract class CodePageDecoder implements Converter, String> { /// or byte values not defined as a character in the code page, /// are emitted as U+FFFD (the Unicode invalid character). /// If not true, the bytes must be calid and defined characters. + @override String convert(List input, {bool allowInvalid = false}); CodePageEncoder _createEncoder(); @@ -442,6 +453,7 @@ class _NonBmpCodePageDecoder extends Converter, String> _NonBmpCodePageDecoder(String characters) : this._(_buildMapping(characters)); _NonBmpCodePageDecoder._(this._characters); + @override int _char(int byte) => _characters[byte]; static Uint32List _buildMapping(String characters) { @@ -461,6 +473,7 @@ class _NonBmpCodePageDecoder extends Converter, String> return result; } + @override CodePageEncoder _createEncoder() { var result = {}; for (var i = 0; i < 256; i++) { @@ -472,6 +485,7 @@ class _NonBmpCodePageDecoder extends Converter, String> return CodePageEncoder._(result); } + @override String convert(List input, {bool allowInvalid = false}) { var buffer = Uint32List(input.length); for (var i = 0; i < input.length; i++) { @@ -493,8 +507,10 @@ class _BmpCodePageDecoder extends Converter, String> } } + @override int _char(int byte) => _characters.codeUnitAt(byte); + @override String convert(List bytes, {bool allowInvalid = false}) { if (allowInvalid) return _convertAllowInvalid(bytes); var count = bytes.length; @@ -529,6 +545,7 @@ class _BmpCodePageDecoder extends Converter, String> return String.fromCharCodes(codeUnits); } + @override CodePageEncoder _createEncoder() => CodePageEncoder._bmp(_characters); } @@ -561,6 +578,7 @@ class CodePageEncoder extends Converter> { /// If [input] contains characters that are not available /// in this code page, they are replaced by the [invalidCharacter] byte, /// and then [invalidCharacter] must have been supplied. + @override Uint8List convert(String input, {int? invalidCharacter}) { if (invalidCharacter != null) { RangeError.checkValueInInterval( diff --git a/lib/src/euc_jp.dart b/lib/src/euc_jp.dart index 6647ba5..12251f8 100644 --- a/lib/src/euc_jp.dart +++ b/lib/src/euc_jp.dart @@ -2,7 +2,7 @@ import 'dart:convert'; import 'dart:math'; import 'euc_jp_table.dart'; -const eucJp = const EucJPCodec(); +const eucJp = EucJPCodec(); class EucJPDecoder extends Converter, String> { const EucJPDecoder(); @@ -36,10 +36,10 @@ class EucJPDecoder extends Converter, String> { class EucJPEncoder extends Converter> { const EucJPEncoder(); @override - List convert(String s) { + List convert(String input) { List result = []; - for (int i = 0; i < s.length; i++) { - var bytes = utf8.encode(s[i]); + for (int i = 0; i < input.length; i++) { + var bytes = utf8.encode(input[i]); var value = 0; for (var i = 0, length = bytes.length; i < length; i++) { diff --git a/lib/src/euc_jp_table.dart b/lib/src/euc_jp_table.dart index cde18b5..be648e8 100644 --- a/lib/src/euc_jp_table.dart +++ b/lib/src/euc_jp_table.dart @@ -1,3 +1,5 @@ +// ignore_for_file: constant_identifier_names + const EUC_TABLE = { 0x00: [0], 0x01: [1], diff --git a/lib/src/euc_kr.dart b/lib/src/euc_kr.dart index c1781fe..f4aa932 100644 --- a/lib/src/euc_kr.dart +++ b/lib/src/euc_kr.dart @@ -8,10 +8,13 @@ class EucKRCodec extends Encoding { const EucKRCodec([this._allowInvalid = false]) : super(); + @override String get name => "euc-kr"; + @override EucKREncoder get encoder => const EucKREncoder(); + @override EucKRDecoder get decoder => _allowInvalid ? const EucKRDecoder(true) : const EucKRDecoder(); } @@ -22,7 +25,7 @@ class EucKREncoder extends Converter> { @override List convert(String input) { List bits = []; - input.codeUnits.forEach((i) { + for (var i in input.codeUnits) { if (i < 0x80) { bits.add(i); } else { @@ -34,7 +37,7 @@ class EucKREncoder extends Converter> { bits.add(code & 0xff); } } - }); + } return bits; } } diff --git a/lib/src/gbk.dart b/lib/src/gbk.dart index c7026b7..fb18543 100644 --- a/lib/src/gbk.dart +++ b/lib/src/gbk.dart @@ -1,3 +1,5 @@ +// ignore_for_file: constant_identifier_names, duplicate_ignore + import 'dart:convert'; import 'dart:typed_data'; import 'gbk_encoder_map.dart'; diff --git a/lib/src/shift_jis.dart b/lib/src/shift_jis.dart index cb9fbd0..a771d66 100644 --- a/lib/src/shift_jis.dart +++ b/lib/src/shift_jis.dart @@ -43,10 +43,10 @@ class ShiftJISEncoder extends Converter> { const ShiftJISEncoder(); @override - List convert(String s) { + List convert(String input) { List result = []; - for (int i = 0; i < s.length; i++) { - var bytes = utf8.encode(s[i]); + for (int i = 0; i < input.length; i++) { + var bytes = utf8.encode(input[i]); var value = 0; for (var i = 0, length = bytes.length; i < length; i++) { @@ -76,4 +76,4 @@ class ShiftJISCodec extends Encoding { String get name => 'shift-jis'; } -const shiftJis = const ShiftJISCodec(); +const shiftJis = ShiftJISCodec(); diff --git a/lib/src/shift_jis_table.dart b/lib/src/shift_jis_table.dart index 366ef67..e66ac19 100644 --- a/lib/src/shift_jis_table.dart +++ b/lib/src/shift_jis_table.dart @@ -1,3 +1,5 @@ +// ignore_for_file: constant_identifier_names + const JIS_TABLE = { 0x00: [0], 0x01: [1], diff --git a/lib/src/utf/utf16.dart b/lib/src/utf/utf16.dart index 18ebe0c..85964f4 100644 --- a/lib/src/utf/utf16.dart +++ b/lib/src/utf/utf16.dart @@ -117,7 +117,7 @@ class Utf16Decoder extends Converter, String> { } } -const Utf16Codec utf16 = const Utf16Codec(); +const Utf16Codec utf16 = Utf16Codec(); /// Identifies whether a List of bytes starts (based on offset) with a /// byte-order marker (BOM). diff --git a/lib/src/utf/utf32.dart b/lib/src/utf/utf32.dart index 37b0a05..6f223d9 100644 --- a/lib/src/utf/utf32.dart +++ b/lib/src/utf/utf32.dart @@ -117,7 +117,7 @@ class Utf32Decoder extends Converter, String> { .whereType()); } -const Utf32Codec utf32 = const Utf32Codec(); +const Utf32Codec utf32 = Utf32Codec(); /// Identifies whether a List of bytes starts (based on offset) with a /// byte-order marker (BOM). diff --git a/pubspec.lock b/pubspec.lock index 297a1c0..4dcfde9 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -85,6 +85,13 @@ packages: url: "https://pub.flutter-io.cn" source: hosted version: "6.1.2" + flutter_lints: + dependency: "direct dev" + description: + name: flutter_lints + url: "https://pub.flutter-io.cn" + source: hosted + version: "1.0.4" frontend_server_client: dependency: transitive description: @@ -127,6 +134,13 @@ packages: url: "https://pub.flutter-io.cn" source: hosted version: "0.6.3" + lints: + dependency: transitive + description: + name: lints + url: "https://pub.flutter-io.cn" + source: hosted + version: "1.0.1" logging: dependency: transitive description: diff --git a/pubspec.yaml b/pubspec.yaml index a7e8184..0f0118c 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -1,6 +1,6 @@ name: charset description: Multiple charset Encoding and Decoding Library. -version: 0.2.4 +version: 1.0.0 homepage: repository: https://github.com/shirne/charset-dart publish_to: https://pub.dartlang.org @@ -12,6 +12,7 @@ dependencies: dev_dependencies: test: + flutter_lints: ^1.0.0 # For information on the generic Dart part of this file, see the # following page: https://dart.dev/tools/pub/pubspec diff --git a/test/charset_test.dart b/test/charset_test.dart index 9c83aa4..2f86ba6 100644 --- a/test/charset_test.dart +++ b/test/charset_test.dart @@ -1,3 +1,5 @@ +// ignore_for_file: avoid_print + import 'dart:convert'; import 'package:charset/charset.dart'; @@ -12,6 +14,11 @@ void main() { utf16.encode("上善若水"), [254, 255, 78, 10, 85, 132, 130, 229, 108, 52]); }); + test('controls', () { + String textl = "0x0atdaa\t\rRsad"; + expect(latin1.encode(textl), latin2.encode(textl)); + }); + test('test encode', () { String toCheck = "that particularly stands out to me is \u0625\u0650" "\u062C\u064E\u0651\u0627\u0635 (\u02BE\u0101\u1E63) \"pear\", suggested to have originated from Hebrew "