From 88bc11e646c8d1db7ac7894f74e2f660d9a82c54 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 17 Sep 2013 19:42:07 -0700 Subject: [PATCH] Document a few undocumented modules in libstd Hopefull this will make our libstd docs appear a little more "full". --- src/libstd/c_str.rs | 52 ++++++++++++ src/libstd/condition.rs | 175 ++++++++++++++++++++++++++++++++-------- src/libstd/fmt/mod.rs | 17 ++-- src/libstd/iter.rs | 55 ++++++++++++- src/libstd/str.rs | 86 ++++++++++++++++++-- src/libstd/vec.rs | 2 + 6 files changed, 336 insertions(+), 51 deletions(-) diff --git a/src/libstd/c_str.rs b/src/libstd/c_str.rs index 75598b300a371..0e623f33ff700 100644 --- a/src/libstd/c_str.rs +++ b/src/libstd/c_str.rs @@ -8,6 +8,58 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +/*! + +C-string manipulation and management + +This modules provides the basic methods for creating and manipulating +null-terminated strings for use with FFI calls (back to C). Most C APIs require +that the string being passed to them is null-terminated, and by default rust's +string types are *not* null terminated. + +The other problem with translating Rust strings to C strings is that Rust +strings can validly contain a null-byte in the middle of the string (0 is a +valid unicode codepoint). This means that not all Rust strings can actually be +translated to C strings. + +# Creation of a C string + +A C string is managed through the `CString` type defined in this module. It +"owns" the internal buffer of characters and will automatically deallocate the +buffer when the string is dropped. The `ToCStr` trait is implemented for `&str` +and `&[u8]`, but the conversions can fail due to some of the limitations +explained above. + +This also means that currently whenever a C string is created, an allocation +must be performed to place the data elsewhere (the lifetime of the C string is +not tied to the lifetime of the original string/data buffer). If C strings are +heavily used in applications, then caching may be advisable to prevent +unnecessary amounts of allocations. + +An example of creating and using a C string would be: + +~~~{.rust} +use std::libc; +externfn!(fn puts(s: *libc::c_char)) + +let my_string = "Hello, world!"; + +// Allocate the C string with an explicit local that owns the string. The +// `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope. +let my_c_string = my_string.to_c_str(); +do my_c_string.with_ref |c_buffer| { + unsafe { puts(c_buffer); } +} + +// Don't save off the allocation of the C string, the `c_buffer` will be +// deallocated when this block returns! +do my_string.with_c_str |c_buffer| { + unsafe { puts(c_buffer); } +} +~~~ + +*/ + use cast; use iter::{Iterator, range}; use libc; diff --git a/src/libstd/condition.rs b/src/libstd/condition.rs index 954b8bd73300c..c47dcfe3de69a 100644 --- a/src/libstd/condition.rs +++ b/src/libstd/condition.rs @@ -8,71 +8,179 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! Condition handling */ +/*! -#[allow(missing_doc)]; +Condition handling + +Conditions are a utility used to deal with handling error conditions. The syntax +of a condition handler strikes a resemblance to try/catch blocks in other +languages, but condition handlers are *not* a form of exception handling in the +same manner. + +A condition is declared through the `condition!` macro provided by the compiler: + +~~~{.rust} +condition! { + pub my_error: int -> ~str; +} +~~~ + +This macro declares an inner module called `my_error` with one static variable, +`cond` that is a static `Condition` instance. To help understand what the other +parameters are used for, an example usage of this condition would be: + +~~~{.rust} +do my_error::cond.trap(|raised_int| { + + // the condition `my_error` was raised on, and the value it raised is stored + // in `raised_int`. This closure must return a `~str` type (as specified in + // the declaration of the condition + if raised_int == 3 { ~"three" } else { ~"oh well" } + +}).inside { + + // The condition handler above is installed for the duration of this block. + // That handler will override any previous handler, but the previous handler + // is restored when this block returns (handlers nest) + // + // If any code from this block (or code from another block) raises on the + // condition, then the above handler will be invoked (so long as there's no + // other nested handler). + + println(my_error::cond.raise(3)); // prints "three" + println(my_error::cond.raise(4)); // prints "oh well" + +} +~~~ + +Condition handling is useful in cases where propagating errors is either to +cumbersome or just not necessary in the first place. It should also be noted, +though, that if there is not handler installed when a condition is raised, then +the task invokes `fail!()` and will terminate. + +## More Info + +Condition handlers as an error strategy is well explained in the [conditions +tutorial](http://static.rust-lang.org/doc/master/tutorial-conditions.html), +along with comparing and contrasting it with other error handling strategies. + +*/ use local_data; use prelude::*; +use unstable::raw::Closure; -// helper for transmutation, shown below. -type RustClosure = (int, int); - +#[doc(hidden)] pub struct Handler { - handle: RustClosure, - prev: Option<@Handler>, + priv handle: Closure, + priv prev: Option<@Handler>, } +/// This struct represents the state of a condition handler. It contains a key +/// into TLS which holds the currently install handler, along with the name of +/// the condition (useful for debugging). +/// +/// This struct should never be created directly, but rather only through the +/// `condition!` macro provided to all libraries using libstd. pub struct Condition { + /// Name of the condition handler name: &'static str, + /// TLS key used to insert/remove values in TLS. key: local_data::Key<@Handler> } impl Condition { + /// Creates an object which binds the specified handler. This will also save + /// the current handler *on creation* such that when the `Trap` is consumed, + /// it knows which handler to restore. + /// + /// # Example + /// + /// ~~~{.rust} + /// condition! { my_error: int -> int; } + /// + /// let trap = my_error::cond.trap(|error| error + 3); + /// + /// // use `trap`'s inside method to register the handler and then run a + /// // block of code with the handler registered + /// ~~~ pub fn trap<'a>(&'a self, h: &'a fn(T) -> U) -> Trap<'a, T, U> { - unsafe { - let p : *RustClosure = ::cast::transmute(&h); - let prev = local_data::get(self.key, |k| k.map(|&x| *x)); - let h = @Handler { handle: *p, prev: prev }; - Trap { cond: self, handler: h } - } + let h: Closure = unsafe { ::cast::transmute(h) }; + let prev = local_data::get(self.key, |k| k.map(|&x| *x)); + let h = @Handler { handle: h, prev: prev }; + Trap { cond: self, handler: h } } + /// Raises on this condition, invoking any handler if one has been + /// registered, or failing the current task otherwise. + /// + /// While a condition handler is being run, the condition will have no + /// handler listed, so a task failure will occur if the condition is + /// re-raised during the handler. + /// + /// # Arguments + /// + /// * t - The argument to pass along to the condition handler. + /// + /// # Return value + /// + /// If a handler is found, its return value is returned, otherwise this + /// function will not return. pub fn raise(&self, t: T) -> U { let msg = fmt!("Unhandled condition: %s: %?", self.name, t); self.raise_default(t, || fail!(msg.clone())) } + /// Performs the same functionality as `raise`, except that when no handler + /// is found the `default` argument is called instead of failing the task. pub fn raise_default(&self, t: T, default: &fn() -> U) -> U { - unsafe { - match local_data::pop(self.key) { - None => { - debug!("Condition.raise: found no handler"); - default() - } - Some(handler) => { - debug!("Condition.raise: found handler"); - match handler.prev { - None => {} - Some(hp) => local_data::set(self.key, hp) - } - let handle : &fn(T) -> U = - ::cast::transmute(handler.handle); - let u = handle(t); - local_data::set(self.key, handler); - u + match local_data::pop(self.key) { + None => { + debug!("Condition.raise: found no handler"); + default() + } + Some(handler) => { + debug!("Condition.raise: found handler"); + match handler.prev { + None => {} + Some(hp) => local_data::set(self.key, hp) } + let handle : &fn(T) -> U = unsafe { + ::cast::transmute(handler.handle) + }; + let u = handle(t); + local_data::set(self.key, handler); + u } } } } +/// A `Trap` is created when the `trap` method is invoked on a `Condition`, and +/// it is used to actually bind a handler into the TLS slot reserved for this +/// condition. +/// +/// Normally this object is not dealt with directly, but rather it's directly +/// used after being returned from `trap` struct Trap<'self, T, U> { - cond: &'self Condition, - handler: @Handler + priv cond: &'self Condition, + priv handler: @Handler } impl<'self, T, U> Trap<'self, T, U> { + /// Execute a block of code with this trap handler's exception handler + /// registered. + /// + /// # Example + /// + /// ~~~{.rust} + /// condition! { my_error: int -> int; } + /// + /// let result = do my_error::cond.trap(|error| error + 3).inside { + /// my_error::cond.raise(4) + /// }; + /// assert_eq!(result, 7); + /// ~~~ pub fn inside(&self, inner: &'self fn() -> V) -> V { let _g = Guard { cond: self.cond }; debug!("Trap: pushing handler to TLS"); @@ -81,8 +189,9 @@ impl<'self, T, U> Trap<'self, T, U> { } } +#[doc(hidden)] struct Guard<'self, T, U> { - cond: &'self Condition + priv cond: &'self Condition } #[unsafe_destructor] diff --git a/src/libstd/fmt/mod.rs b/src/libstd/fmt/mod.rs index cad9f14bda734..99a5ed4d69812 100644 --- a/src/libstd/fmt/mod.rs +++ b/src/libstd/fmt/mod.rs @@ -10,17 +10,18 @@ /*! -# The Formatting Module +The Formatting Module -This module contains the runtime support for the `format!` syntax extension. This -macro is implemented in the compiler to emit calls to this module in order to -format arguments at runtime into strings and streams. +This module contains the runtime support for the `format!` syntax extension. +This macro is implemented in the compiler to emit calls to this module in order +to format arguments at runtime into strings and streams. The functions contained in this module should not normally be used in everyday -use cases of `format!`. The assumptions made by these functions are unsafe for all -inputs, and the compiler performs a large amount of validation on the arguments -to `format!` in order to ensure safety at runtime. While it is possible to call -these functions directly, it is not recommended to do so in the general case. +use cases of `format!`. The assumptions made by these functions are unsafe for +all inputs, and the compiler performs a large amount of validation on the +arguments to `format!` in order to ensure safety at runtime. While it is +possible to call these functions directly, it is not recommended to do so in the +general case. ## Usage diff --git a/src/libstd/iter.rs b/src/libstd/iter.rs index 07d2aeac627b8..87fad9aae70a9 100644 --- a/src/libstd/iter.rs +++ b/src/libstd/iter.rs @@ -8,12 +8,59 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! Composable external iterators +/*! -The `Iterator` trait defines an interface for objects which implement iteration as a state machine. +Composable external iterators -Algorithms like `zip` are provided as `Iterator` implementations which wrap other objects -implementing the `Iterator` trait. +# The `Iterator` trait + +This module defines Rust's core iteration trait. The `Iterator` trait has one +un-implemented method, `next`. All other methods are derived through default +methods to perform operations such as `zip`, `chain`, `enumerate`, and `fold`. + +The goal of this module is to unify iteration across all containers in Rust. +An iterator can be considered as a state machine which is used to track which +element will be yielded next. + +There are various extensions also defined in this module to assist with various +types of iteration, such as the `DoubleEndedIterator` for iterating in reverse, +the `FromIterator` trait for creating a container from an iterator, and much +more. + +## Rust's `for` loop + +The special syntax used by rust's `for` loop is based around the `Iterator` +trait defined in this module. For loops can be viewed as a syntactical expansion +into a `loop`, for example, the `for` loop in this example is essentially +translated to the `loop` below. + +~~~{.rust} +let values = ~[1, 2, 3]; + +// "Syntactical sugar" taking advantage of an iterator +for &x in values.iter() { + println!("{}", x); +} + +// Rough translation of the iteration without a `for` iterator. +let mut it = values.iter(); +loop { + match it.next() { + Some(&x) => { + println!("{}", x); + } + None => { break } + } +} +~~~ + +This `for` loop syntax can be applied to any iterator over any type. + +## Iteration protocol and more + +More detailed information about iterators can be found in the [container +tutorial](http://static.rust-lang.org/doc/master/tutorial-container.html) with +the rest of the rust manuals. */ diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 93cac8797bb75..a7391b76de16c 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -8,12 +8,86 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! String manipulation -//! -//! Strings are a packed UTF-8 representation of text, stored as -//! buffers of u8 bytes. The buffer is not null terminated. -//! Strings should be indexed in bytes, for efficiency, but UTF-8 unsafe -//! operations should be avoided. +/*! + +String manipulation + +# Basic Usage + +Rust's string type is one of the core primitive types of the language. While +represented by the name `str`, the name `str` is not actually a valid type in +Rust. Each string must also be decorated with how its ownership. This means that +there are three common kinds of strings in rust: + +* `~str` - This is an owned string. This type obeys all of the normal semantics + of the `~T` types, meaning that it has one, and only one, owner. This + type cannot be implicitly copied, and is moved out of when passed to + other functions. + +* `@str` - This is a managed string. Similarly to `@T`, this type can be + implicitly copied, and each implicit copy will increment the + reference count to the string. This means that there is not "true + owner" of the string, and the string will be deallocated when the + reference count reaches 0. + +* `&str` - Finally, this is the borrowed string type. This type of string can + only be created from one of the other two kinds of strings. As the + name "borrowed" implies, this type of string is owned elsewhere, and + this string cannot be moved out of. + +As an example, here's a few different kinds of strings. + +~~~{.rust} +let owned_string = ~"I am an owned string"; +let managed_string = @"This string is garbage-collected"; +let borrowed_string1 = "This string is borrowed with the 'static lifetime"; +let borrowed_string2: &str = owned_string; // owned strings can be borrowed +let borrowed_string3: &str = managed_string; // managed strings can also be borrowed +~~~ + +From the example above, you can see that rust has 3 different kinds of string +literals. The owned/managed literals correspond to the owned/managed string +types, but the "borrowed literal" is actually more akin to C's concept of a +static string. + +When a string is declared without a `~` or `@` sigil, then the string is +allocated statically in the rodata of the executable/library. The string then +has the type `&'static str` meaning that the string is valid for the `'static` +lifetime, otherwise known as the lifetime of the entire program. As can be +inferred from the type, these static strings are not mutable. + +# Mutability + +Many languages have immutable strings by default, and rust has a particular +flavor on this idea. As with the rest of Rust types, strings are immutable by +default. If a string is declared as `mut`, however, it may be mutated. This +works the same way as the rest of Rust's type system in the sense that if +there's a mutable reference to a string, there may only be one mutable reference +to that string. With these guarantees, strings can easily transition between +being mutable/immutable with the same benefits of having mutable strings in +other languages. + +~~~{.rust} +let mut buf = ~"testing"; +buf.push_char(' '); +buf.push_str("123"); +assert_eq!(buf, ~"testing 123"); +~~~ + +# Representation + +Rust's string type, `str`, is a sequence of unicode codepoints encoded as a +stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly +encoded UTF-8 sequences. Additionally, strings are not guaranteed to be +null-terminated (the null byte is a valid unicode codepoint). + +The actual representation of strings have direct mappings to vectors: + +* `~str` is the same as `~[u8]` +* `&str` is the same as `&[u8]` +* `@str` is the same as `@[u8]` + +*/ use at_vec; use cast; diff --git a/src/libstd/vec.rs b/src/libstd/vec.rs index 9fc0eaf72b14d..3cdee0eb19ad9 100644 --- a/src/libstd/vec.rs +++ b/src/libstd/vec.rs @@ -10,6 +10,8 @@ /*! +Vector manipulation + The `vec` module contains useful code to help work with vector values. Vectors are Rust's list type. Vectors contain zero or more values of homogeneous types: