Skip to content

Commit

Permalink
codepage with controls characters
Browse files Browse the repository at this point in the history
  • Loading branch information
shirne committed Apr 1, 2022
1 parent 09676dc commit fa46cf9
Show file tree
Hide file tree
Showing 15 changed files with 73 additions and 23 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 1.0.0

* CodePage with controls #1

## 0.2.4

* fix canEncode for shift-jis invalid codec
Expand Down
11 changes: 2 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@ main() {
}
```

## Getting Started
## Notice

This project is a starting point for a Dart
[package](https://flutter.dev/developing-packages/),
a library module containing code that can be shared easily across
multiple Flutter or Dart projects.

For help getting started with Flutter, view our
[online documentation](https://flutter.dev/docs), which offers tutorials,
samples, guidance on mobile development, and a full API reference.
0.x 版本中CodePage不含控制字符 (0x00-0x31)
4 changes: 4 additions & 0 deletions analysis_options.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include: package:flutter_lints/flutter.yaml

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options
20 changes: 19 additions & 1 deletion lib/src/code_page.dart
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,13 @@ const _noControls = "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
"\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"
"\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD";

const _controls = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007"
"\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011"
"\u0012\u0013\u0014\u0015\u0016\u0017\u0018"
"\u0019\u001a\u001b\u001c\u001d\u001e\u001f";

/// ASCII characters without control characters. Shared by many code pages.
const _ascii = "$_noControls"
const _ascii = "$_controls"
r""" !"#$%&'()*+,-./0123456789:;<=>?"""
r"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_"
"`abcdefghijklmnopqrstuvwxyz{|}~\uFFFD";
Expand All @@ -327,7 +332,9 @@ const _ascii = "$_noControls"
/// A code page is a way to map bytes to character.
/// As such, it can only represent 256 different characters.
class CodePage extends Encoding {
@override
final CodePageDecoder decoder;
@override
final String name;
CodePageEncoder? _encoder;

Expand Down Expand Up @@ -378,13 +385,16 @@ class CodePage extends Encoding {
int operator [](int byte) => decoder._char(byte);

/// Encodes [input] using `encoder.convert`.
@override
Uint8List encode(String input, {int? invalidCharacter}) =>
encoder.convert(input, invalidCharacter: invalidCharacter);

/// Decodes [bytes] using `encoder.convert`.
@override
String decode(List<int> bytes, {bool allowInvalid = false}) =>
decoder.convert(bytes, allowInvalid: allowInvalid);

@override
CodePageEncoder get encoder => _encoder ??= decoder._createEncoder();
}

Expand All @@ -402,6 +412,7 @@ abstract class CodePageDecoder implements Converter<List<int>, String> {
/// or byte values not defined as a character in the code page,
/// are emitted as U+FFFD (the Unicode invalid character).
/// If not true, the bytes must be calid and defined characters.
@override
String convert(List<int> input, {bool allowInvalid = false});

CodePageEncoder _createEncoder();
Expand Down Expand Up @@ -442,6 +453,7 @@ class _NonBmpCodePageDecoder extends Converter<List<int>, String>
_NonBmpCodePageDecoder(String characters) : this._(_buildMapping(characters));
_NonBmpCodePageDecoder._(this._characters);

@override
int _char(int byte) => _characters[byte];

static Uint32List _buildMapping(String characters) {
Expand All @@ -461,6 +473,7 @@ class _NonBmpCodePageDecoder extends Converter<List<int>, String>
return result;
}

@override
CodePageEncoder _createEncoder() {
var result = <int, int>{};
for (var i = 0; i < 256; i++) {
Expand All @@ -472,6 +485,7 @@ class _NonBmpCodePageDecoder extends Converter<List<int>, String>
return CodePageEncoder._(result);
}

@override
String convert(List<int> input, {bool allowInvalid = false}) {
var buffer = Uint32List(input.length);
for (var i = 0; i < input.length; i++) {
Expand All @@ -493,8 +507,10 @@ class _BmpCodePageDecoder extends Converter<List<int>, String>
}
}

@override
int _char(int byte) => _characters.codeUnitAt(byte);

@override
String convert(List<int> bytes, {bool allowInvalid = false}) {
if (allowInvalid) return _convertAllowInvalid(bytes);
var count = bytes.length;
Expand Down Expand Up @@ -529,6 +545,7 @@ class _BmpCodePageDecoder extends Converter<List<int>, String>
return String.fromCharCodes(codeUnits);
}

@override
CodePageEncoder _createEncoder() => CodePageEncoder._bmp(_characters);
}

Expand Down Expand Up @@ -561,6 +578,7 @@ class CodePageEncoder extends Converter<String, List<int>> {
/// If [input] contains characters that are not available
/// in this code page, they are replaced by the [invalidCharacter] byte,
/// and then [invalidCharacter] must have been supplied.
@override
Uint8List convert(String input, {int? invalidCharacter}) {
if (invalidCharacter != null) {
RangeError.checkValueInInterval(
Expand Down
8 changes: 4 additions & 4 deletions lib/src/euc_jp.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import 'dart:convert';
import 'dart:math';
import 'euc_jp_table.dart';

const eucJp = const EucJPCodec();
const eucJp = EucJPCodec();

class EucJPDecoder extends Converter<List<int>, String> {
const EucJPDecoder();
Expand Down Expand Up @@ -36,10 +36,10 @@ class EucJPDecoder extends Converter<List<int>, String> {
class EucJPEncoder extends Converter<String, List<int>> {
const EucJPEncoder();
@override
List<int> convert(String s) {
List<int> convert(String input) {
List<int> result = [];
for (int i = 0; i < s.length; i++) {
var bytes = utf8.encode(s[i]);
for (int i = 0; i < input.length; i++) {
var bytes = utf8.encode(input[i]);
var value = 0;

for (var i = 0, length = bytes.length; i < length; i++) {
Expand Down
2 changes: 2 additions & 0 deletions lib/src/euc_jp_table.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// ignore_for_file: constant_identifier_names

const EUC_TABLE = {
0x00: [0],
0x01: [1],
Expand Down
7 changes: 5 additions & 2 deletions lib/src/euc_kr.dart
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ class EucKRCodec extends Encoding {

const EucKRCodec([this._allowInvalid = false]) : super();

@override
String get name => "euc-kr";

@override
EucKREncoder get encoder => const EucKREncoder();

@override
EucKRDecoder get decoder =>
_allowInvalid ? const EucKRDecoder(true) : const EucKRDecoder();
}
Expand All @@ -22,7 +25,7 @@ class EucKREncoder extends Converter<String, List<int>> {
@override
List<int> convert(String input) {
List<int> bits = [];
input.codeUnits.forEach((i) {
for (var i in input.codeUnits) {
if (i < 0x80) {
bits.add(i);
} else {
Expand All @@ -34,7 +37,7 @@ class EucKREncoder extends Converter<String, List<int>> {
bits.add(code & 0xff);
}
}
});
}
return bits;
}
}
Expand Down
2 changes: 2 additions & 0 deletions lib/src/gbk.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// ignore_for_file: constant_identifier_names, duplicate_ignore

import 'dart:convert';
import 'dart:typed_data';
import 'gbk_encoder_map.dart';
Expand Down
8 changes: 4 additions & 4 deletions lib/src/shift_jis.dart
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ class ShiftJISEncoder extends Converter<String, List<int>> {
const ShiftJISEncoder();

@override
List<int> convert(String s) {
List<int> convert(String input) {
List<int> result = [];
for (int i = 0; i < s.length; i++) {
var bytes = utf8.encode(s[i]);
for (int i = 0; i < input.length; i++) {
var bytes = utf8.encode(input[i]);
var value = 0;

for (var i = 0, length = bytes.length; i < length; i++) {
Expand Down Expand Up @@ -76,4 +76,4 @@ class ShiftJISCodec extends Encoding {
String get name => 'shift-jis';
}

const shiftJis = const ShiftJISCodec();
const shiftJis = ShiftJISCodec();
2 changes: 2 additions & 0 deletions lib/src/shift_jis_table.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// ignore_for_file: constant_identifier_names

const JIS_TABLE = {
0x00: [0],
0x01: [1],
Expand Down
2 changes: 1 addition & 1 deletion lib/src/utf/utf16.dart
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class Utf16Decoder extends Converter<List<int>, String> {
}
}

const Utf16Codec utf16 = const Utf16Codec();
const Utf16Codec utf16 = Utf16Codec();

/// Identifies whether a List of bytes starts (based on offset) with a
/// byte-order marker (BOM).
Expand Down
2 changes: 1 addition & 1 deletion lib/src/utf/utf32.dart
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class Utf32Decoder extends Converter<List<int>, String> {
.whereType<int>());
}

const Utf32Codec utf32 = const Utf32Codec();
const Utf32Codec utf32 = Utf32Codec();

/// Identifies whether a List of bytes starts (based on offset) with a
/// byte-order marker (BOM).
Expand Down
14 changes: 14 additions & 0 deletions pubspec.lock
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ packages:
url: "https://pub.flutter-io.cn"
source: hosted
version: "6.1.2"
flutter_lints:
dependency: "direct dev"
description:
name: flutter_lints
url: "https://pub.flutter-io.cn"
source: hosted
version: "1.0.4"
frontend_server_client:
dependency: transitive
description:
Expand Down Expand Up @@ -127,6 +134,13 @@ packages:
url: "https://pub.flutter-io.cn"
source: hosted
version: "0.6.3"
lints:
dependency: transitive
description:
name: lints
url: "https://pub.flutter-io.cn"
source: hosted
version: "1.0.1"
logging:
dependency: transitive
description:
Expand Down
3 changes: 2 additions & 1 deletion pubspec.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: charset
description: Multiple charset Encoding and Decoding Library.
version: 0.2.4
version: 1.0.0
homepage:
repository: https://github.com/shirne/charset-dart
publish_to: https://pub.dartlang.org
Expand All @@ -12,6 +12,7 @@ dependencies:

dev_dependencies:
test:
flutter_lints: ^1.0.0

# For information on the generic Dart part of this file, see the
# following page: https://dart.dev/tools/pub/pubspec
7 changes: 7 additions & 0 deletions test/charset_test.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// ignore_for_file: avoid_print

import 'dart:convert';

import 'package:charset/charset.dart';
Expand All @@ -12,6 +14,11 @@ void main() {
utf16.encode("上善若水"), [254, 255, 78, 10, 85, 132, 130, 229, 108, 52]);
});

test('controls', () {
String textl = "0x0atdaa\t\rRsad";
expect(latin1.encode(textl), latin2.encode(textl));
});

test('test encode', () {
String toCheck = "that particularly stands out to me is \u0625\u0650"
"\u062C\u064E\u0651\u0627\u0635 (\u02BE\u0101\u1E63) \"pear\", suggested to have originated from Hebrew "
Expand Down

0 comments on commit fa46cf9

Please sign in to comment.