Skip to content

Commit 437876d

Browse files
committed
Throw an exception for unsupported charsets
1 parent 09a8b77 commit 437876d

File tree

5 files changed

+108
-14
lines changed

5 files changed

+108
-14
lines changed

README.md

+10-2
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,21 @@ To include it for use in your project, please install via composer:
2020
composer require zbateson/mb-wrapper
2121
```
2222

23+
## Php 7 Support Dropped
24+
25+
As of mb-wrapper 2.0, support for php 7 has been dropped.
26+
2327
## Requirements
2428

25-
mb-wrapper requires PHP 7.1 or newer. Tested on PHP 7.1, 7.2, 7.3, 7.4, 8.0, 8.1, and 8.2 on GitHub Actions.
29+
mb-wrapper requires PHP 8.0 or newer. Tested on PHP 8.0, 8.1, 8.2, and 8.3 on GitHub Actions.
30+
31+
## New in 2.0
32+
33+
If converting or performing an operation on a string fails in iconv, an UnsupportedCharsetException is now thrown.
2634

2735
## Description
2836

29-
MbWrapper is intended for use wherever mb_* or iconv_* is used. It scans supported charsets returned by mb_list_encodings(), and prefers mb_* functions, but will fallback to iconv if a charset isn't supported.
37+
MbWrapper is intended for use wherever mb_* or iconv_* is used. It scans supported charsets returned by mb_list_encodings(), and prefers mb_* functions, but will fallback to iconv if a charset isn't supported by the mb_ functions.
3038

3139
A list of aliased charsets is maintained for both mb_* and iconv, where a supported charset exists for an alias. This is useful for mail and http parsing as other systems may report encodings not recognized by mb_* or iconv.
3240

composer.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
}
1010
],
1111
"require": {
12-
"php": ">=7.1",
12+
"php": ">=8.0",
1313
"symfony/polyfill-mbstring": "^1.9",
1414
"symfony/polyfill-iconv": "^1.9"
1515
},

src/MbWrapper.php

+47-11
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
*
55
* @license http://opensource.org/licenses/bsd-license.php BSD
66
*/
7-
87
namespace ZBateson\MbWrapper;
98

109
/**
@@ -325,6 +324,38 @@ private function getNormalizedCharset($charset)
325324
return \preg_replace('/[^A-Z0-9]+/', '', $upper);
326325
}
327326

327+
private function iconv(string $fromCharset, string $toCharset, string $str) : string
328+
{
329+
$ret = @\iconv($fromCharset, $toCharset . '//TRANSLIT//IGNORE', $str);
330+
if ($ret === false) {
331+
throw new UnsupportedCharsetException("Unable to convert from charsets: $fromCharset to $toCharset");
332+
}
333+
return $ret;
334+
}
335+
336+
private function iconvStrlen(string $str, string $charset) : int
337+
{
338+
$ret = @\iconv_strlen($str, $charset . '//TRANSLIT//IGNORE');
339+
if ($ret === false) {
340+
throw new UnsupportedCharsetException("Charset $charset is not supported");
341+
}
342+
return $ret;
343+
}
344+
345+
private function iconvSubstr(string $str, string $charset, int $start, int $length = null) : string
346+
{
347+
$ret = @\iconv_substr($str, $start, $length, $charset . '//TRANSLIT//IGNORE');
348+
if ($ret === false) {
349+
$strLength = $this->iconvStrlen($str, $charset);
350+
if ($start > $strLength) {
351+
// returns empty to keep in line with mb_substr functionality
352+
return '';
353+
}
354+
throw new UnsupportedCharsetException("Charset $charset is not supported");
355+
}
356+
return $ret;
357+
}
358+
328359
/**
329360
* Converts the passed string's charset from the passed $fromCharset to the
330361
* passed $toCharset
@@ -333,6 +364,7 @@ private function getNormalizedCharset($charset)
333364
* back to iconv if not. If the source or destination character sets aren't
334365
* supported, a blank string is returned.
335366
*
367+
* @throws UnsupportedCharsetException if iconv fails
336368
*/
337369
public function convert(string $str, string $fromCharset, string $toCharset) : string
338370
{
@@ -347,16 +379,16 @@ public function convert(string $str, string $fromCharset, string $toCharset) : s
347379
if ($str !== '') {
348380
if ($from !== false && $to === false) {
349381
$str = \mb_convert_encoding($str, 'UTF-8', $from);
350-
return \iconv('UTF-8', $this->getIconvAlias($toCharset) . '//TRANSLIT//IGNORE', $str);
382+
return $this->iconv('UTF-8', $this->getIconvAlias($toCharset), $str);
351383
} elseif ($from === false && $to !== false) {
352-
$str = \iconv($this->getIconvAlias($fromCharset), 'UTF-8//TRANSLIT//IGNORE', $str);
384+
$str = $this->iconv($this->getIconvAlias($fromCharset), 'UTF-8', $str);
353385
return \mb_convert_encoding($str, $to, 'UTF-8');
354386
} elseif ($from !== false && $to !== false) {
355387
return \mb_convert_encoding($str, $to, $from);
356388
}
357-
return \iconv(
389+
return $this->iconv(
358390
$this->getIconvAlias($fromCharset),
359-
$this->getIconvAlias($toCharset) . '//TRANSLIT//IGNORE',
391+
$this->getIconvAlias($toCharset),
360392
$str
361393
);
362394
}
@@ -376,25 +408,32 @@ public function checkEncoding(string $str, string $charset) : bool
376408
return \mb_check_encoding($str, $mb);
377409
}
378410
$ic = $this->getIconvAlias($charset);
379-
return (@\iconv($ic, $ic, $str) !== false);
411+
return (@\iconv($ic, $ic . '//TRANSLIT//IGNORE', $str) !== false);
380412
}
381413

382414
/**
383415
* Uses either mb_strlen or iconv_strlen to return the number of characters
384416
* in the passed $str for the given $charset
417+
*
418+
* @throws UnsupportedCharsetException if iconv fails
385419
*/
386420
public function getLength(string $str, string $charset) : int
387421
{
388422
$mb = $this->getMbCharset($charset);
389423
if ($mb !== false) {
390424
return \mb_strlen($str, $mb);
391425
}
392-
return \iconv_strlen($str, $this->getIconvAlias($charset) . '//TRANSLIT//IGNORE');
426+
return $this->iconvStrlen($str, $this->getIconvAlias($charset));
393427
}
394428

395429
/**
396430
* Uses either mb_substr or iconv_substr to create and return a substring of
397431
* the passed $str.
432+
*
433+
* If the offset provided in $start is greater than the length of the
434+
* string, an empty string is returned.
435+
*
436+
* @throws UnsupportedCharsetException if iconv fails
398437
*/
399438
public function getSubstr(string $str, string $charset, int $start, ?int $length = null) : string
400439
{
@@ -409,10 +448,7 @@ public function getSubstr(string $str, string $charset, int $start, ?int $length
409448
$str = $this->convert($str, $ic, 'UTF-8');
410449
return $this->convert($this->getSubstr($str, 'UTF-8', $start, $length), 'UTF-8', $ic);
411450
}
412-
if ($length === null) {
413-
$length = \iconv_strlen($str, $ic . '//TRANSLIT//IGNORE');
414-
}
415-
return \iconv_substr($str, $start, $length, $ic . '//TRANSLIT//IGNORE');
451+
return $this->iconvSubstr($str, $ic, $start, $length);
416452
}
417453

418454
/**

src/UnsupportedCharsetException.php

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?php
2+
/**
3+
* This file is part of the ZBateson\MailMimeParser project.
4+
*
5+
* @license http://opensource.org/licenses/bsd-license.php BSD
6+
*/
7+
8+
namespace ZBateson\MbWrapper;
9+
10+
use InvalidArgumentException;
11+
12+
/**
13+
* Exception thrown if MbWrapper can't convert from or two a specified charset.
14+
*
15+
* @author Zaahid Bateson
16+
*/
17+
class UnsupportedCharsetException extends InvalidArgumentException
18+
{
19+
}

tests/MbWrapper/MbWrapperTest.php

+31
Original file line numberDiff line numberDiff line change
@@ -204,4 +204,35 @@ public function testIconvSubstr() : void
204204

205205
}
206206
}
207+
208+
public function testConvertInvalidCharset() : void
209+
{
210+
$this->expectException(UnsupportedCharsetException::class);
211+
$test = 'This is my string';
212+
$converter = new MbWrapper();
213+
$converter->convert($converter->convert($test, 'UTF-8', 'ASDF-ABC-123'), 'ASDF-ABC-123', 'UTF-8');
214+
}
215+
216+
public function testLengthInvalidCharset() : void
217+
{
218+
$this->expectException(UnsupportedCharsetException::class);
219+
$test = 'This is my string';
220+
$converter = new MbWrapper();
221+
$converter->getLength($test, 'ASDF-ABC-123');
222+
}
223+
224+
public function testSubstrInvalidCharset() : void
225+
{
226+
$this->expectException(UnsupportedCharsetException::class);
227+
$test = 'This is my string';
228+
$converter = new MbWrapper();
229+
$converter->getSubstr($test, 'ASDF-ABC-123', 0);
230+
}
231+
232+
public function testSubstrInvalidOffset() : void
233+
{
234+
$test = 'Test';
235+
$converter = new MbWrapper();
236+
$this->assertEquals('', $converter->getSubstr($test, 'CP1250', 10));
237+
}
207238
}

0 commit comments

Comments
 (0)