@@ -188,7 +188,7 @@ class ExternalMerger(Merger):
188188 dict. Repeat this again until combine all the items.
189189
190190 - Before return any items, it will load each partition and
191- combine them seperately . Yield them before loading next
191+ combine them separately . Yield them before loading next
192192 partition.
193193
194194 - During loading a partition, if the memory goes over limit,
@@ -197,7 +197,7 @@ class ExternalMerger(Merger):
197197
198198 `data` and `pdata` are used to hold the merged items in memory.
199199 At first, all the data are merged into `data`. Once the used
200- memory goes over limit, the items in `data` are dumped indo
200+ memory goes over limit, the items in `data` are dumped into
201201 disks, `data` will be cleared, all rest of items will be merged
202202 into `pdata` and then dumped into disks. Before returning, all
203203 the items in `pdata` will be dumped into disks.
@@ -235,7 +235,7 @@ def __init__(self, aggregator, memory_limit=512, serializer=None,
235235 self .batch = batch
236236 # scale is used to scale down the hash of key for recursive hash map
237237 self .scale = scale
238- # unpartitioned merged data
238+ # un-partitioned merged data
239239 self .data = {}
240240 # partitioned merged data, list of dicts
241241 self .pdata = []
@@ -334,7 +334,7 @@ def _spill(self):
334334
335335 for k , v in self .data .iteritems ():
336336 h = self ._partition (k )
337- # put one item in batch, make it compatitable with load_stream
337+ # put one item in batch, make it compatible with load_stream
338338 # it will increase the memory if dump them in batch
339339 self .serializer .dump_stream ([(k , v )], streams [h ])
340340
@@ -676,12 +676,12 @@ class ExternalGroupBy(ExternalMerger):
676676 If the data in one partitions can be hold in memory, then it
677677 will load and combine them in memory and yield.
678678
679- - If the dataset in one partittion cannot be hold in memory,
679+ - If the dataset in one partition cannot be hold in memory,
680680 it will sort them first. If all the files are already sorted,
681681 it merge them by heap.merge(), so it will do external sort
682682 for all the files.
683683
684- - After sorting, `GroupByKey` class will put all the continious
684+ - After sorting, `GroupByKey` class will put all the continuous
685685 items with the same key as a group, yield the values as
686686 an iterator.
687687 """
0 commit comments