Skip to content

Commit

Permalink
Bias generated chars (BurntSushi#99)
Browse files Browse the repository at this point in the history
That's how generated strings typically looks now:

O���[.?

}'셥-�91(]ª!ñ�·��	#* "9ô�£´�:؀{乸0%㯓9똁⁔Rz릉¤tó£±�? (]>�
                                                <܏nf)*ᖯ'��ñ��°6>¦ó¤�¡匈�#$'`맽ô���c￸HX)�[r莅3*A ð¹�§7]

	G_媣<ꉟต8~^i7䱄釱fh)+��{G�0�

ﵽ❔K/5‴9[꤅X1J[M&4[؜¥"

⇉Ɩ©�42폨ĒUñ�¸�5.`'O§)⁣�-���*ñ·�¼‌r ؅
 '@/@�骲6!ñ�§��,&E؀ 
e?!�܏fó � ó¶±¬V�_ (]>el󯣿o+狪*="⁅
     ￸ñ���肖<{ó¿¿½\+巤

{T��*ô�¿½⁆?ó¿¿½ ꡯ칵쫨C}1<ʼn��*���..#ñ��º& J:,j=؂‹3“褙`}j¬ñ���+‌‐󾬲¦bO©￰S�ñ¡��~~�.�ª
=㍃�&f�E&Q@ð¾�±R�笹
⁁�D

6')�m9m�)�sqT�3H㹵0￸35蹈\>^鯅ñ��»ó��­�ó¨£�؅�‰쩻8 ⁋0�N\WGô�¡�¥�®��5UWñª���1钟[!�X��+<󿬹難"4​�ó�®³ᔵ"ó¬�®!G

揟’O�1'ñ�¿��+髾@$Zvó�¹�䵃�;ð»�¸�h뢚ស᜼9Yó¿¿¾_L蛇�AjpⰚ�㤩

©揪)ò�®�-d�A){¥攝剟>~ó���؃="

«ó¿¿½1賬‟z⁉�VOô�¿¾�2I!mô�´¿N4;,ñ�¾»i>-\B��)裉᷈�f륯  +ाX~9[u 樴m‿ñ°��!=�=�C[	ط57_£=‴�`⁧5�_�4}⁃‥�у灼	¥1:�>ð�©»

<$)>@, -"♄f<��ð¶¾�
  • Loading branch information
vi committed Dec 13, 2015
1 parent 43665bd commit 52c775a
Showing 1 changed file with 64 additions and 1 deletion.
65 changes: 64 additions & 1 deletion src/arbitrary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,70 @@ impl Arbitrary for String {
}

impl Arbitrary for char {
fn arbitrary<G: Gen>(g: &mut G) -> char { g.gen() }
fn arbitrary<G: Gen>(g: &mut G) -> char {
let mode = g.gen_range(0, 100);
match mode {
0...49 => {
// ASCII + some control characters
char::from_u32(g.gen_range(0, 0xB0)).expect("I except all characters with codepoints 0 to 255 to be valid")
}
50...59 => {
// Unicode BMP characters
loop {
if let Some(x) = char::from_u32(g.gen_range(0, 0x10000)) {
return x
}
// ignore surrogate pairs
}
}
60...84 => {
// Characters often used in programming languages
*g.choose(&[
' ', ' ', ' ',
'\t',
'\n',
'~', '`', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')',
'_', '-', '=', '+','[', ']', '{', '}',':',';','\'','"','\\',
'|',',','<','>','.','/','?',
'0', '1','2','3','4','5','6','7','8','9',
]).unwrap()
}
85...89 => {
// Tricky Unicode, part 1
*g.choose(&[
'\u{0149}', // a deprecated character
'\u{fff0}', // some of "Other, format" category:
'\u{fff1}','\u{fff2}','\u{fff3}','\u{fff4}','\u{fff5}',
'\u{fff6}','\u{fff7}','\u{fff8}','\u{fff9}','\u{fffA}',
'\u{fffB}','\u{fffC}','\u{fffD}','\u{fffE}','\u{fffF}',
'\u{0600}','\u{0601}','\u{0602}','\u{0603}',
'\u{0604}','\u{0605}','\u{061C}',
'\u{06DD}','\u{070F}','\u{180E}',
'\u{110BD}', '\u{1D173}',
'\u{e0001}', // tag
'\u{e0020}',// tag space
'\u{e000}', '\u{e001}', '\u{ef8ff}', // private use
'\u{f0000}', '\u{ffffd}','\u{ffffe}', '\u{fffff}',
'\u{100000}','\u{10FFFD}','\u{10FFFE}','\u{10FFFF}',
// "Other, surrogate" characters are so that very special
// that they are not even allowed in safe Rust,
//so omitted here
'\u{3000}', // ideographic space
'\u{1680}',
// other space characters are already covered by two next branches
]).unwrap()
}
90...94 => {
// Tricky unicode, part 2
char::from_u32(g.gen_range(0x2000, 0x2070)).unwrap()
}
95...99 => {
// Completely arbitrary characters
g.gen()
}
_ => unreachable!()
}
}

fn shrink(&self) -> Box<Iterator<Item=char>> {
Box::new((*self as u32).shrink().filter_map(char::from_u32))
Expand Down

0 comments on commit 52c775a

Please sign in to comment.