-
Notifications
You must be signed in to change notification settings - Fork 0
/
Utils.elm
89 lines (62 loc) · 1.89 KB
/
Utils.elm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
module Utils exposing (..)
import String
import Char
import String.Extra exposing (replace)
import List.Extra exposing (dropWhile)
foldl2 : (a -> b -> c -> c) -> c -> List a -> List b -> c
foldl2 f acc list1 list2 =
case ( list1, list2 ) of
( [], _ ) ->
acc
( _, [] ) ->
acc
( x :: xs, y :: ys ) ->
foldl2 f (f x y acc) xs ys
atLeast min num =
if num > min then
num
else
min
isSpace : Char -> Bool
isSpace x =
if Char.toCode x == 32 then
True
else
False
-- String.words doesn't keep the line brakes. This implementation does.
-- Another difference is that it'll return a (List (List Char)) rahter than a (List String)
words : String -> List (List Char)
words xs =
List.foldr
(\c acc ->
case acc of
[] ->
[]
x :: xs ->
if isSpace c then
([] :: acc)
else
(c :: x) :: xs
)
[ [] ]
(String.toList xs)
wordsToChars : List (List Char) -> List (Char)
wordsToChars =
List.intersperse [ ' ' ]
>> List.concat
-- Texts from Project Gutenberg have some metadata at the begginning. This function removes them.
dropHeaders : String -> String
dropHeaders =
String.lines
>> dropWhile (not << String.contains "***")
>> List.drop 1
>> dropWhile (\a -> String.contains "Produced" a || String.length a < 2)
>> List.intersperse "\n"
>> String.concat
-- Texts from Project Gutenberg have a new line each 70 chars. This function removes those and only keeps the original ones,
-- which are identified by consisting of two \n chars.
removeExtraNewLines : String -> String
removeExtraNewLines =
replace "\n\n" "$^"
>> replace "\n" " "
>> replace "$^" "\x0D"