2022-02-11 20:25:34 +01:00
// SPDX-License-Identifier: GPL-2.0
//! String representations.
2024-03-27 22:36:03 -03:00
use crate ::alloc ::{ flags ::* , vec_ext ::VecExt , AllocError } ;
2022-11-10 17:41:33 +01:00
use alloc ::vec ::Vec ;
2022-11-10 17:41:29 +01:00
use core ::fmt ::{ self , Write } ;
2024-02-23 17:37:09 +01:00
use core ::ops ::{ self , Deref , DerefMut , Index } ;
2022-02-11 20:25:34 +01:00
2024-04-01 23:23:02 +02:00
use crate ::error ::{ code ::* , Error } ;
2022-11-10 17:41:28 +01:00
2022-11-10 17:41:26 +01:00
/// Byte string without UTF-8 validity guarantee.
2024-02-08 18:55:36 +09:00
#[ repr(transparent) ]
pub struct BStr ( [ u8 ] ) ;
impl BStr {
/// Returns the length of this string.
#[ inline ]
pub const fn len ( & self ) -> usize {
self . 0. len ( )
}
/// Returns `true` if the string is empty.
#[ inline ]
pub const fn is_empty ( & self ) -> bool {
self . len ( ) = = 0
}
/// Creates a [`BStr`] from a `[u8]`.
#[ inline ]
pub const fn from_bytes ( bytes : & [ u8 ] ) -> & Self {
// SAFETY: `BStr` is transparent to `[u8]`.
unsafe { & * ( bytes as * const [ u8 ] as * const BStr ) }
}
}
impl fmt ::Display for BStr {
/// Formats printable ASCII characters, escaping the rest.
///
/// ```
/// # use kernel::{fmt, b_str, str::{BStr, CString}};
/// let ascii = b_str!("Hello, BStr!");
/// let s = CString::try_from_fmt(fmt!("{}", ascii)).unwrap();
/// assert_eq!(s.as_bytes(), "Hello, BStr!".as_bytes());
///
/// let non_ascii = b_str!("🦀");
/// let s = CString::try_from_fmt(fmt!("{}", non_ascii)).unwrap();
/// assert_eq!(s.as_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes());
/// ```
fn fmt ( & self , f : & mut fmt ::Formatter < '_ > ) -> fmt ::Result {
for & b in & self . 0 {
match b {
// Common escape codes.
b '\t' = > f . write_str ( " \\ t " ) ? ,
b '\n' = > f . write_str ( " \\ n " ) ? ,
b '\r' = > f . write_str ( " \\ r " ) ? ,
// Printable characters.
0x20 ..= 0x7e = > f . write_char ( b as char ) ? ,
_ = > write! ( f , " \\ x{:02x} " , b ) ? ,
}
}
Ok ( ( ) )
}
}
impl fmt ::Debug for BStr {
/// Formats printable ASCII characters with a double quote on either end,
/// escaping the rest.
///
/// ```
/// # use kernel::{fmt, b_str, str::{BStr, CString}};
/// // Embedded double quotes are escaped.
/// let ascii = b_str!("Hello, \"BStr\"!");
/// let s = CString::try_from_fmt(fmt!("{:?}", ascii)).unwrap();
/// assert_eq!(s.as_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes());
///
/// let non_ascii = b_str!("😺");
/// let s = CString::try_from_fmt(fmt!("{:?}", non_ascii)).unwrap();
/// assert_eq!(s.as_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes());
/// ```
fn fmt ( & self , f : & mut fmt ::Formatter < '_ > ) -> fmt ::Result {
f . write_char ( '"' ) ? ;
for & b in & self . 0 {
match b {
// Common escape codes.
b '\t' = > f . write_str ( " \\ t " ) ? ,
b '\n' = > f . write_str ( " \\ n " ) ? ,
b '\r' = > f . write_str ( " \\ r " ) ? ,
// String escape characters.
b '\"' = > f . write_str ( " \\ \" " ) ? ,
b '\\' = > f . write_str ( " \\ \\ " ) ? ,
// Printable characters.
0x20 ..= 0x7e = > f . write_char ( b as char ) ? ,
_ = > write! ( f , " \\ x{:02x} " , b ) ? ,
}
}
f . write_char ( '"' )
}
}
impl Deref for BStr {
type Target = [ u8 ] ;
#[ inline ]
fn deref ( & self ) -> & Self ::Target {
& self . 0
}
}
2022-11-10 17:41:26 +01:00
2022-11-10 17:41:27 +01:00
/// Creates a new [`BStr`] from a string literal.
///
/// `b_str!` converts the supplied string literal to byte string, so non-ASCII
/// characters can be included.
///
/// # Examples
///
/// ```
/// # use kernel::b_str;
/// # use kernel::str::BStr;
/// const MY_BSTR: &BStr = b_str!("My awesome BStr!");
/// ```
#[ macro_export ]
macro_rules ! b_str {
( $str :literal ) = > { {
const S : & 'static str = $str ;
2024-02-08 18:55:36 +09:00
const C : & 'static $crate ::str ::BStr = $crate ::str ::BStr ::from_bytes ( S . as_bytes ( ) ) ;
2022-11-10 17:41:27 +01:00
C
} } ;
}
2022-11-10 17:41:28 +01:00
/// Possible errors when using conversion functions in [`CStr`].
#[ derive(Debug, Clone, Copy) ]
pub enum CStrConvertError {
/// Supplied bytes contain an interior `NUL`.
InteriorNul ,
/// Supplied bytes are not terminated by `NUL`.
NotNulTerminated ,
}
impl From < CStrConvertError > for Error {
#[ inline ]
fn from ( _ : CStrConvertError ) -> Error {
EINVAL
}
}
/// A string that is guaranteed to have exactly one `NUL` byte, which is at the
/// end.
///
/// Used for interoperability with kernel APIs that take C strings.
#[ repr(transparent) ]
pub struct CStr ( [ u8 ] ) ;
impl CStr {
/// Returns the length of this string excluding `NUL`.
#[ inline ]
pub const fn len ( & self ) -> usize {
self . len_with_nul ( ) - 1
}
/// Returns the length of this string with `NUL`.
#[ inline ]
pub const fn len_with_nul ( & self ) -> usize {
// SAFETY: This is one of the invariant of `CStr`.
// We add a `unreachable_unchecked` here to hint the optimizer that
// the value returned from this function is non-zero.
if self . 0. is_empty ( ) {
unsafe { core ::hint ::unreachable_unchecked ( ) } ;
}
self . 0. len ( )
}
/// Returns `true` if the string only includes `NUL`.
#[ inline ]
pub const fn is_empty ( & self ) -> bool {
self . len ( ) = = 0
}
/// Wraps a raw C string pointer.
///
/// # Safety
///
/// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must
/// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr`
/// must not be mutated.
#[ inline ]
pub unsafe fn from_char_ptr < ' a > ( ptr : * const core ::ffi ::c_char ) -> & ' a Self {
// SAFETY: The safety precondition guarantees `ptr` is a valid pointer
// to a `NUL`-terminated C string.
let len = unsafe { bindings ::strlen ( ptr ) } + 1 ;
// SAFETY: Lifetime guaranteed by the safety precondition.
let bytes = unsafe { core ::slice ::from_raw_parts ( ptr as _ , len as _ ) } ;
// SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`.
// As we have added 1 to `len`, the last byte is known to be `NUL`.
unsafe { Self ::from_bytes_with_nul_unchecked ( bytes ) }
}
/// Creates a [`CStr`] from a `[u8]`.
///
/// The provided slice must be `NUL`-terminated, does not contain any
/// interior `NUL` bytes.
pub const fn from_bytes_with_nul ( bytes : & [ u8 ] ) -> Result < & Self , CStrConvertError > {
if bytes . is_empty ( ) {
return Err ( CStrConvertError ::NotNulTerminated ) ;
}
if bytes [ bytes . len ( ) - 1 ] ! = 0 {
return Err ( CStrConvertError ::NotNulTerminated ) ;
}
let mut i = 0 ;
// `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking,
// while it couldn't optimize away bounds checks for `i < bytes.len() - 1`.
while i + 1 < bytes . len ( ) {
if bytes [ i ] = = 0 {
return Err ( CStrConvertError ::InteriorNul ) ;
}
i + = 1 ;
}
// SAFETY: We just checked that all properties hold.
Ok ( unsafe { Self ::from_bytes_with_nul_unchecked ( bytes ) } )
}
/// Creates a [`CStr`] from a `[u8]` without performing any additional
/// checks.
///
/// # Safety
///
/// `bytes` *must* end with a `NUL` byte, and should only have a single
/// `NUL` byte (or the string will be truncated).
#[ inline ]
pub const unsafe fn from_bytes_with_nul_unchecked ( bytes : & [ u8 ] ) -> & CStr {
// SAFETY: Properties of `bytes` guaranteed by the safety precondition.
unsafe { core ::mem ::transmute ( bytes ) }
}
2024-02-23 17:37:09 +01:00
/// Creates a mutable [`CStr`] from a `[u8]` without performing any
/// additional checks.
///
/// # Safety
///
/// `bytes` *must* end with a `NUL` byte, and should only have a single
/// `NUL` byte (or the string will be truncated).
#[ inline ]
pub unsafe fn from_bytes_with_nul_unchecked_mut ( bytes : & mut [ u8 ] ) -> & mut CStr {
// SAFETY: Properties of `bytes` guaranteed by the safety precondition.
unsafe { & mut * ( bytes as * mut [ u8 ] as * mut CStr ) }
}
2022-11-10 17:41:28 +01:00
/// Returns a C pointer to the string.
#[ inline ]
pub const fn as_char_ptr ( & self ) -> * const core ::ffi ::c_char {
self . 0. as_ptr ( ) as _
}
2024-01-31 21:23:27 +01:00
/// Convert the string to a byte slice without the trailing `NUL` byte.
2022-11-10 17:41:28 +01:00
#[ inline ]
pub fn as_bytes ( & self ) -> & [ u8 ] {
& self . 0 [ .. self . len ( ) ]
}
2024-01-31 21:23:27 +01:00
/// Convert the string to a byte slice containing the trailing `NUL` byte.
2022-11-10 17:41:28 +01:00
#[ inline ]
pub const fn as_bytes_with_nul ( & self ) -> & [ u8 ] {
& self . 0
}
/// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8.
///
/// If the contents of the [`CStr`] are valid UTF-8 data, this
/// function will return the corresponding [`&str`] slice. Otherwise,
/// it will return an error with details of where UTF-8 validation failed.
///
/// # Examples
///
/// ```
/// # use kernel::str::CStr;
/// let cstr = CStr::from_bytes_with_nul(b"foo\0").unwrap();
/// assert_eq!(cstr.to_str(), Ok("foo"));
/// ```
#[ inline ]
pub fn to_str ( & self ) -> Result < & str , core ::str ::Utf8Error > {
core ::str ::from_utf8 ( self . as_bytes ( ) )
}
/// Unsafely convert this [`CStr`] into a [`&str`], without checking for
/// valid UTF-8.
///
/// # Safety
///
/// The contents must be valid UTF-8.
///
/// # Examples
///
/// ```
/// # use kernel::c_str;
/// # use kernel::str::CStr;
2024-01-31 21:23:28 +01:00
/// let bar = c_str!("ツ");
2022-11-10 17:41:28 +01:00
/// // SAFETY: String literals are guaranteed to be valid UTF-8
/// // by the Rust compiler.
/// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ");
/// ```
#[ inline ]
pub unsafe fn as_str_unchecked ( & self ) -> & str {
unsafe { core ::str ::from_utf8_unchecked ( self . as_bytes ( ) ) }
}
2023-05-03 14:10:16 +00:00
/// Convert this [`CStr`] into a [`CString`] by allocating memory and
/// copying over the string data.
pub fn to_cstring ( & self ) -> Result < CString , AllocError > {
CString ::try_from ( self )
}
2024-02-23 17:37:09 +01:00
/// Converts this [`CStr`] to its ASCII lower case equivalent in-place.
///
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
/// but non-ASCII letters are unchanged.
///
/// To return a new lowercased value without modifying the existing one, use
/// [`to_ascii_lowercase()`].
///
/// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
pub fn make_ascii_lowercase ( & mut self ) {
// INVARIANT: This doesn't introduce or remove NUL bytes in the C
// string.
self . 0. make_ascii_lowercase ( ) ;
}
/// Converts this [`CStr`] to its ASCII upper case equivalent in-place.
///
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
/// but non-ASCII letters are unchanged.
///
/// To return a new uppercased value without modifying the existing one, use
/// [`to_ascii_uppercase()`].
///
/// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
pub fn make_ascii_uppercase ( & mut self ) {
// INVARIANT: This doesn't introduce or remove NUL bytes in the C
// string.
self . 0. make_ascii_uppercase ( ) ;
}
/// Returns a copy of this [`CString`] where each character is mapped to its
/// ASCII lower case equivalent.
///
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
/// but non-ASCII letters are unchanged.
///
/// To lowercase the value in-place, use [`make_ascii_lowercase`].
///
/// [`make_ascii_lowercase`]: str::make_ascii_lowercase
pub fn to_ascii_lowercase ( & self ) -> Result < CString , AllocError > {
let mut s = self . to_cstring ( ) ? ;
s . make_ascii_lowercase ( ) ;
Ok ( s )
}
/// Returns a copy of this [`CString`] where each character is mapped to its
/// ASCII upper case equivalent.
///
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
/// but non-ASCII letters are unchanged.
///
/// To uppercase the value in-place, use [`make_ascii_uppercase`].
///
/// [`make_ascii_uppercase`]: str::make_ascii_uppercase
pub fn to_ascii_uppercase ( & self ) -> Result < CString , AllocError > {
let mut s = self . to_cstring ( ) ? ;
s . make_ascii_uppercase ( ) ;
Ok ( s )
}
2022-11-10 17:41:28 +01:00
}
2022-11-10 17:41:29 +01:00
impl fmt ::Display for CStr {
/// Formats printable ASCII characters, escaping the rest.
///
/// ```
/// # use kernel::c_str;
2023-07-18 07:27:48 +02:00
/// # use kernel::fmt;
2022-11-10 17:41:29 +01:00
/// # use kernel::str::CStr;
/// # use kernel::str::CString;
/// let penguin = c_str!("🐧");
/// let s = CString::try_from_fmt(fmt!("{}", penguin)).unwrap();
/// assert_eq!(s.as_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes());
///
/// let ascii = c_str!("so \"cool\"");
/// let s = CString::try_from_fmt(fmt!("{}", ascii)).unwrap();
/// assert_eq!(s.as_bytes_with_nul(), "so \"cool\"\0".as_bytes());
/// ```
fn fmt ( & self , f : & mut fmt ::Formatter < '_ > ) -> fmt ::Result {
for & c in self . as_bytes ( ) {
if ( 0x20 .. 0x7f ) . contains ( & c ) {
// Printable character.
f . write_char ( c as char ) ? ;
} else {
write! ( f , " \\ x{:02x} " , c ) ? ;
}
}
Ok ( ( ) )
}
}
impl fmt ::Debug for CStr {
/// Formats printable ASCII characters with a double quote on either end, escaping the rest.
///
/// ```
/// # use kernel::c_str;
2023-07-18 07:27:48 +02:00
/// # use kernel::fmt;
2022-11-10 17:41:29 +01:00
/// # use kernel::str::CStr;
/// # use kernel::str::CString;
/// let penguin = c_str!("🐧");
/// let s = CString::try_from_fmt(fmt!("{:?}", penguin)).unwrap();
/// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes());
///
/// // Embedded double quotes are escaped.
/// let ascii = c_str!("so \"cool\"");
/// let s = CString::try_from_fmt(fmt!("{:?}", ascii)).unwrap();
/// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes());
/// ```
fn fmt ( & self , f : & mut fmt ::Formatter < '_ > ) -> fmt ::Result {
f . write_str ( " \" " ) ? ;
for & c in self . as_bytes ( ) {
match c {
// Printable characters.
b '\"' = > f . write_str ( " \\ \" " ) ? ,
0x20 ..= 0x7e = > f . write_char ( c as char ) ? ,
_ = > write! ( f , " \\ x{:02x} " , c ) ? ,
}
}
f . write_str ( " \" " )
}
}
impl AsRef < BStr > for CStr {
#[ inline ]
fn as_ref ( & self ) -> & BStr {
2024-02-08 18:55:36 +09:00
BStr ::from_bytes ( self . as_bytes ( ) )
2022-11-10 17:41:29 +01:00
}
}
impl Deref for CStr {
type Target = BStr ;
#[ inline ]
fn deref ( & self ) -> & Self ::Target {
2024-02-08 18:55:36 +09:00
self . as_ref ( )
2022-11-10 17:41:29 +01:00
}
}
impl Index < ops ::RangeFrom < usize > > for CStr {
type Output = CStr ;
#[ inline ]
fn index ( & self , index : ops ::RangeFrom < usize > ) -> & Self ::Output {
// Delegate bounds checking to slice.
// Assign to _ to mute clippy's unnecessary operation warning.
let _ = & self . as_bytes ( ) [ index . start .. ] ;
// SAFETY: We just checked the bounds.
unsafe { Self ::from_bytes_with_nul_unchecked ( & self . 0 [ index . start .. ] ) }
}
}
impl Index < ops ::RangeFull > for CStr {
type Output = CStr ;
#[ inline ]
fn index ( & self , _index : ops ::RangeFull ) -> & Self ::Output {
self
}
}
mod private {
use core ::ops ;
// Marker trait for index types that can be forward to `BStr`.
pub trait CStrIndex { }
impl CStrIndex for usize { }
impl CStrIndex for ops ::Range < usize > { }
impl CStrIndex for ops ::RangeInclusive < usize > { }
impl CStrIndex for ops ::RangeToInclusive < usize > { }
}
impl < Idx > Index < Idx > for CStr
where
Idx : private ::CStrIndex ,
BStr : Index < Idx > ,
{
type Output = < BStr as Index < Idx > > ::Output ;
#[ inline ]
fn index ( & self , index : Idx ) -> & Self ::Output {
2024-02-08 18:55:36 +09:00
& self . as_ref ( ) [ index ]
2022-11-10 17:41:29 +01:00
}
}
2022-11-10 17:41:31 +01:00
/// Creates a new [`CStr`] from a string literal.
///
/// The string literal should not contain any `NUL` bytes.
///
/// # Examples
///
/// ```
/// # use kernel::c_str;
/// # use kernel::str::CStr;
/// const MY_CSTR: &CStr = c_str!("My awesome CStr!");
/// ```
#[ macro_export ]
macro_rules ! c_str {
( $str :expr ) = > { {
const S : & str = concat! ( $str , " \0 " ) ;
const C : & $crate ::str ::CStr = match $crate ::str ::CStr ::from_bytes_with_nul ( S . as_bytes ( ) ) {
Ok ( v ) = > v ,
Err ( _ ) = > panic! ( " string contains interior NUL " ) ,
} ;
C
} } ;
}
2022-11-10 17:41:30 +01:00
#[ cfg(test) ]
mod tests {
use super ::* ;
2024-02-08 18:55:36 +09:00
use alloc ::format ;
const ALL_ASCII_CHARS : & 'static str =
" \\ x01 \\ x02 \\ x03 \\ x04 \\ x05 \\ x06 \\ x07 \\ x08 \\ x09 \\ x0a \\ x0b \\ x0c \\ x0d \\ x0e \\ x0f \
\ \ x10 \ \ x11 \ \ x12 \ \ x13 \ \ x14 \ \ x15 \ \ x16 \ \ x17 \ \ x18 \ \ x19 \ \ x1a \ \ x1b \ \ x1c \ \ x1d \ \ x1e \ \ x1f \
! \ " #$%&'()*+,-./0123456789:;<=>?@ \
ABCDEFGHIJKLMNOPQRSTUVWXYZ [ \ \ ] ^ _ ` abcdefghijklmnopqrstuvwxyz { | } ~ \ \ x7f \
\ \ x80 \ \ x81 \ \ x82 \ \ x83 \ \ x84 \ \ x85 \ \ x86 \ \ x87 \ \ x88 \ \ x89 \ \ x8a \ \ x8b \ \ x8c \ \ x8d \ \ x8e \ \ x8f \
\ \ x90 \ \ x91 \ \ x92 \ \ x93 \ \ x94 \ \ x95 \ \ x96 \ \ x97 \ \ x98 \ \ x99 \ \ x9a \ \ x9b \ \ x9c \ \ x9d \ \ x9e \ \ x9f \
\ \ xa0 \ \ xa1 \ \ xa2 \ \ xa3 \ \ xa4 \ \ xa5 \ \ xa6 \ \ xa7 \ \ xa8 \ \ xa9 \ \ xaa \ \ xab \ \ xac \ \ xad \ \ xae \ \ xaf \
\ \ xb0 \ \ xb1 \ \ xb2 \ \ xb3 \ \ xb4 \ \ xb5 \ \ xb6 \ \ xb7 \ \ xb8 \ \ xb9 \ \ xba \ \ xbb \ \ xbc \ \ xbd \ \ xbe \ \ xbf \
\ \ xc0 \ \ xc1 \ \ xc2 \ \ xc3 \ \ xc4 \ \ xc5 \ \ xc6 \ \ xc7 \ \ xc8 \ \ xc9 \ \ xca \ \ xcb \ \ xcc \ \ xcd \ \ xce \ \ xcf \
\ \ xd0 \ \ xd1 \ \ xd2 \ \ xd3 \ \ xd4 \ \ xd5 \ \ xd6 \ \ xd7 \ \ xd8 \ \ xd9 \ \ xda \ \ xdb \ \ xdc \ \ xdd \ \ xde \ \ xdf \
\ \ xe0 \ \ xe1 \ \ xe2 \ \ xe3 \ \ xe4 \ \ xe5 \ \ xe6 \ \ xe7 \ \ xe8 \ \ xe9 \ \ xea \ \ xeb \ \ xec \ \ xed \ \ xee \ \ xef \
\ \ xf0 \ \ xf1 \ \ xf2 \ \ xf3 \ \ xf4 \ \ xf5 \ \ xf6 \ \ xf7 \ \ xf8 \ \ xf9 \ \ xfa \ \ xfb \ \ xfc \ \ xfd \ \ xfe \ \ xff " ;
2022-11-10 17:41:30 +01:00
#[ test ]
fn test_cstr_to_str ( ) {
let good_bytes = b " \xf0 \x9f \xa6 \x80 \0 " ;
let checked_cstr = CStr ::from_bytes_with_nul ( good_bytes ) . unwrap ( ) ;
let checked_str = checked_cstr . to_str ( ) . unwrap ( ) ;
assert_eq! ( checked_str , " 🦀 " ) ;
}
#[ test ]
#[ should_panic ]
fn test_cstr_to_str_panic ( ) {
let bad_bytes = b " \xc3 \x28 \0 " ;
let checked_cstr = CStr ::from_bytes_with_nul ( bad_bytes ) . unwrap ( ) ;
checked_cstr . to_str ( ) . unwrap ( ) ;
}
#[ test ]
fn test_cstr_as_str_unchecked ( ) {
let good_bytes = b " \xf0 \x9f \x90 \xA7 \0 " ;
let checked_cstr = CStr ::from_bytes_with_nul ( good_bytes ) . unwrap ( ) ;
let unchecked_str = unsafe { checked_cstr . as_str_unchecked ( ) } ;
assert_eq! ( unchecked_str , " 🐧 " ) ;
}
2024-02-08 18:55:36 +09:00
#[ test ]
fn test_cstr_display ( ) {
let hello_world = CStr ::from_bytes_with_nul ( b " hello, world! \0 " ) . unwrap ( ) ;
assert_eq! ( format! ( " {} " , hello_world ) , " hello, world! " ) ;
let non_printables = CStr ::from_bytes_with_nul ( b " \x01 \x09 \x0a \0 " ) . unwrap ( ) ;
assert_eq! ( format! ( " {} " , non_printables ) , " \\ x01 \\ x09 \\ x0a " ) ;
let non_ascii = CStr ::from_bytes_with_nul ( b " d \xe9 j \xe0 vu \0 " ) . unwrap ( ) ;
assert_eq! ( format! ( " {} " , non_ascii ) , " d \\ xe9j \\ xe0 vu " ) ;
let good_bytes = CStr ::from_bytes_with_nul ( b " \xf0 \x9f \xa6 \x80 \0 " ) . unwrap ( ) ;
assert_eq! ( format! ( " {} " , good_bytes ) , " \\ xf0 \\ x9f \\ xa6 \\ x80 " ) ;
}
#[ test ]
fn test_cstr_display_all_bytes ( ) {
let mut bytes : [ u8 ; 256 ] = [ 0 ; 256 ] ;
// fill `bytes` with [1..=255] + [0]
for i in u8 ::MIN ..= u8 ::MAX {
bytes [ i as usize ] = i . wrapping_add ( 1 ) ;
}
let cstr = CStr ::from_bytes_with_nul ( & bytes ) . unwrap ( ) ;
assert_eq! ( format! ( " {} " , cstr ) , ALL_ASCII_CHARS ) ;
}
#[ test ]
fn test_cstr_debug ( ) {
let hello_world = CStr ::from_bytes_with_nul ( b " hello, world! \0 " ) . unwrap ( ) ;
assert_eq! ( format! ( " {:?} " , hello_world ) , " \" hello, world! \" " ) ;
let non_printables = CStr ::from_bytes_with_nul ( b " \x01 \x09 \x0a \0 " ) . unwrap ( ) ;
assert_eq! ( format! ( " {:?} " , non_printables ) , " \" \\ x01 \\ x09 \\ x0a \" " ) ;
let non_ascii = CStr ::from_bytes_with_nul ( b " d \xe9 j \xe0 vu \0 " ) . unwrap ( ) ;
assert_eq! ( format! ( " {:?} " , non_ascii ) , " \" d \\ xe9j \\ xe0 vu \" " ) ;
let good_bytes = CStr ::from_bytes_with_nul ( b " \xf0 \x9f \xa6 \x80 \0 " ) . unwrap ( ) ;
assert_eq! ( format! ( " {:?} " , good_bytes ) , " \" \\ xf0 \\ x9f \\ xa6 \\ x80 \" " ) ;
}
#[ test ]
fn test_bstr_display ( ) {
let hello_world = BStr ::from_bytes ( b " hello, world! " ) ;
assert_eq! ( format! ( " {} " , hello_world ) , " hello, world! " ) ;
let escapes = BStr ::from_bytes ( b " _ \t _ \n _ \r _ \\ _ \' _ \" _ " ) ;
assert_eq! ( format! ( " {} " , escapes ) , " _ \\ t_ \\ n_ \\ r_ \\ _'_ \" _ " ) ;
let others = BStr ::from_bytes ( b " \x01 " ) ;
assert_eq! ( format! ( " {} " , others ) , " \\ x01 " ) ;
let non_ascii = BStr ::from_bytes ( b " d \xe9 j \xe0 vu " ) ;
assert_eq! ( format! ( " {} " , non_ascii ) , " d \\ xe9j \\ xe0 vu " ) ;
let good_bytes = BStr ::from_bytes ( b " \xf0 \x9f \xa6 \x80 " ) ;
assert_eq! ( format! ( " {} " , good_bytes ) , " \\ xf0 \\ x9f \\ xa6 \\ x80 " ) ;
}
#[ test ]
fn test_bstr_debug ( ) {
let hello_world = BStr ::from_bytes ( b " hello, world! " ) ;
assert_eq! ( format! ( " {:?} " , hello_world ) , " \" hello, world! \" " ) ;
let escapes = BStr ::from_bytes ( b " _ \t _ \n _ \r _ \\ _ \' _ \" _ " ) ;
assert_eq! ( format! ( " {:?} " , escapes ) , " \" _ \\ t_ \\ n_ \\ r_ \\ \\ _'_ \\ \" _ \" " ) ;
let others = BStr ::from_bytes ( b " \x01 " ) ;
assert_eq! ( format! ( " {:?} " , others ) , " \" \\ x01 \" " ) ;
let non_ascii = BStr ::from_bytes ( b " d \xe9 j \xe0 vu " ) ;
assert_eq! ( format! ( " {:?} " , non_ascii ) , " \" d \\ xe9j \\ xe0 vu \" " ) ;
let good_bytes = BStr ::from_bytes ( b " \xf0 \x9f \xa6 \x80 " ) ;
assert_eq! ( format! ( " {:?} " , good_bytes ) , " \" \\ xf0 \\ x9f \\ xa6 \\ x80 \" " ) ;
}
2022-11-10 17:41:30 +01:00
}
2022-02-11 20:25:34 +01:00
/// Allows formatting of [`fmt::Arguments`] into a raw buffer.
///
/// It does not fail if callers write past the end of the buffer so that they can calculate the
/// size required to fit everything.
///
/// # Invariants
///
/// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos`
/// is less than `end`.
pub ( crate ) struct RawFormatter {
// Use `usize` to use `saturating_*` functions.
beg : usize ,
pos : usize ,
end : usize ,
}
impl RawFormatter {
2022-11-10 17:41:33 +01:00
/// Creates a new instance of [`RawFormatter`] with an empty buffer.
fn new ( ) -> Self {
// INVARIANT: The buffer is empty, so the region that needs to be writable is empty.
Self {
beg : 0 ,
pos : 0 ,
end : 0 ,
}
}
2022-02-11 20:25:34 +01:00
/// Creates a new instance of [`RawFormatter`] with the given buffer pointers.
///
/// # Safety
///
/// If `pos` is less than `end`, then the region between `pos` (inclusive) and `end`
/// (exclusive) must be valid for writes for the lifetime of the returned [`RawFormatter`].
pub ( crate ) unsafe fn from_ptrs ( pos : * mut u8 , end : * mut u8 ) -> Self {
2023-03-03 20:06:29 +01:00
// INVARIANT: The safety requirements guarantee the type invariants.
2022-02-11 20:25:34 +01:00
Self {
beg : pos as _ ,
pos : pos as _ ,
end : end as _ ,
}
}
2022-11-10 17:41:32 +01:00
/// Creates a new instance of [`RawFormatter`] with the given buffer.
///
/// # Safety
///
/// The memory region starting at `buf` and extending for `len` bytes must be valid for writes
/// for the lifetime of the returned [`RawFormatter`].
pub ( crate ) unsafe fn from_buffer ( buf : * mut u8 , len : usize ) -> Self {
let pos = buf as usize ;
// INVARIANT: We ensure that `end` is never less then `buf`, and the safety requirements
// guarantees that the memory region is valid for writes.
Self {
pos ,
beg : pos ,
end : pos . saturating_add ( len ) ,
}
}
2022-02-11 20:25:34 +01:00
/// Returns the current insert position.
///
/// N.B. It may point to invalid memory.
pub ( crate ) fn pos ( & self ) -> * mut u8 {
self . pos as _
}
2022-11-10 17:41:33 +01:00
2024-01-31 21:23:23 +01:00
/// Returns the number of bytes written to the formatter.
2022-11-10 17:41:33 +01:00
pub ( crate ) fn bytes_written ( & self ) -> usize {
self . pos - self . beg
}
2022-02-11 20:25:34 +01:00
}
impl fmt ::Write for RawFormatter {
fn write_str ( & mut self , s : & str ) -> fmt ::Result {
// `pos` value after writing `len` bytes. This does not have to be bounded by `end`, but we
// don't want it to wrap around to 0.
let pos_new = self . pos . saturating_add ( s . len ( ) ) ;
// Amount that we can copy. `saturating_sub` ensures we get 0 if `pos` goes past `end`.
let len_to_copy = core ::cmp ::min ( pos_new , self . end ) . saturating_sub ( self . pos ) ;
if len_to_copy > 0 {
// SAFETY: If `len_to_copy` is non-zero, then we know `pos` has not gone past `end`
// yet, so it is valid for write per the type invariants.
unsafe {
core ::ptr ::copy_nonoverlapping (
s . as_bytes ( ) . as_ptr ( ) ,
self . pos as * mut u8 ,
len_to_copy ,
)
} ;
}
self . pos = pos_new ;
Ok ( ( ) )
}
}
2022-11-10 17:41:32 +01:00
/// Allows formatting of [`fmt::Arguments`] into a raw buffer.
///
/// Fails if callers attempt to write more than will fit in the buffer.
pub ( crate ) struct Formatter ( RawFormatter ) ;
impl Formatter {
/// Creates a new instance of [`Formatter`] with the given buffer.
///
/// # Safety
///
/// The memory region starting at `buf` and extending for `len` bytes must be valid for writes
/// for the lifetime of the returned [`Formatter`].
pub ( crate ) unsafe fn from_buffer ( buf : * mut u8 , len : usize ) -> Self {
// SAFETY: The safety requirements of this function satisfy those of the callee.
Self ( unsafe { RawFormatter ::from_buffer ( buf , len ) } )
}
}
impl Deref for Formatter {
type Target = RawFormatter ;
fn deref ( & self ) -> & Self ::Target {
& self . 0
}
}
impl fmt ::Write for Formatter {
fn write_str ( & mut self , s : & str ) -> fmt ::Result {
self . 0. write_str ( s ) ? ;
// Fail the request if we go past the end of the buffer.
if self . 0. pos > self . 0. end {
Err ( fmt ::Error )
} else {
Ok ( ( ) )
}
}
}
2022-11-10 17:41:33 +01:00
/// An owned string that is guaranteed to have exactly one `NUL` byte, which is at the end.
///
/// Used for interoperability with kernel APIs that take C strings.
///
/// # Invariants
///
/// The string is always `NUL`-terminated and contains no other `NUL` bytes.
///
/// # Examples
///
/// ```
2023-07-18 07:27:48 +02:00
/// use kernel::{str::CString, fmt};
2022-11-10 17:41:33 +01:00
///
/// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20)).unwrap();
/// assert_eq!(s.as_bytes_with_nul(), "abc1020\0".as_bytes());
///
/// let tmp = "testing";
/// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123)).unwrap();
/// assert_eq!(s.as_bytes_with_nul(), "testing123\0".as_bytes());
///
/// // This fails because it has an embedded `NUL` byte.
/// let s = CString::try_from_fmt(fmt!("a\0b{}", 123));
/// assert_eq!(s.is_ok(), false);
/// ```
pub struct CString {
buf : Vec < u8 > ,
}
impl CString {
/// Creates an instance of [`CString`] from the given formatted arguments.
pub fn try_from_fmt ( args : fmt ::Arguments < '_ > ) -> Result < Self , Error > {
// Calculate the size needed (formatted string plus `NUL` terminator).
let mut f = RawFormatter ::new ( ) ;
f . write_fmt ( args ) ? ;
f . write_str ( " \0 " ) ? ;
let size = f . bytes_written ( ) ;
// Allocate a vector with the required number of bytes, and write to it.
2024-03-27 22:36:00 -03:00
let mut buf = < Vec < _ > as VecExt < _ > > ::with_capacity ( size , GFP_KERNEL ) ? ;
2022-11-10 17:41:33 +01:00
// SAFETY: The buffer stored in `buf` is at least of size `size` and is valid for writes.
let mut f = unsafe { Formatter ::from_buffer ( buf . as_mut_ptr ( ) , size ) } ;
f . write_fmt ( args ) ? ;
f . write_str ( " \0 " ) ? ;
// SAFETY: The number of bytes that can be written to `f` is bounded by `size`, which is
// `buf`'s capacity. The contents of the buffer have been initialised by writes to `f`.
unsafe { buf . set_len ( f . bytes_written ( ) ) } ;
// Check that there are no `NUL` bytes before the end.
// SAFETY: The buffer is valid for read because `f.bytes_written()` is bounded by `size`
// (which the minimum buffer size) and is non-zero (we wrote at least the `NUL` terminator)
// so `f.bytes_written() - 1` doesn't underflow.
let ptr = unsafe { bindings ::memchr ( buf . as_ptr ( ) . cast ( ) , 0 , ( f . bytes_written ( ) - 1 ) as _ ) } ;
if ! ptr . is_null ( ) {
return Err ( EINVAL ) ;
}
// INVARIANT: We wrote the `NUL` terminator and checked above that no other `NUL` bytes
// exist in the buffer.
Ok ( Self { buf } )
}
}
impl Deref for CString {
type Target = CStr ;
fn deref ( & self ) -> & Self ::Target {
// SAFETY: The type invariants guarantee that the string is `NUL`-terminated and that no
// other `NUL` bytes exist.
unsafe { CStr ::from_bytes_with_nul_unchecked ( self . buf . as_slice ( ) ) }
}
}
2022-11-10 17:41:34 +01:00
2024-02-23 17:37:09 +01:00
impl DerefMut for CString {
fn deref_mut ( & mut self ) -> & mut Self ::Target {
// SAFETY: A `CString` is always NUL-terminated and contains no other
// NUL bytes.
unsafe { CStr ::from_bytes_with_nul_unchecked_mut ( self . buf . as_mut_slice ( ) ) }
}
}
2023-05-03 14:10:16 +00:00
impl < ' a > TryFrom < & ' a CStr > for CString {
type Error = AllocError ;
fn try_from ( cstr : & ' a CStr ) -> Result < CString , AllocError > {
let mut buf = Vec ::new ( ) ;
2024-03-27 22:36:00 -03:00
< Vec < _ > as VecExt < _ > > ::extend_from_slice ( & mut buf , cstr . as_bytes_with_nul ( ) , GFP_KERNEL )
2023-05-03 14:10:16 +00:00
. map_err ( | _ | AllocError ) ? ;
// INVARIANT: The `CStr` and `CString` types have the same invariants for
// the string data, and we copied it over without changes.
Ok ( CString { buf } )
}
}
2023-07-14 18:19:33 +09:00
impl fmt ::Debug for CString {
fn fmt ( & self , f : & mut fmt ::Formatter < '_ > ) -> fmt ::Result {
fmt ::Debug ::fmt ( & * * self , f )
}
}
2022-11-10 17:41:34 +01:00
/// A convenience alias for [`core::format_args`].
#[ macro_export ]
macro_rules ! fmt {
( $( $f :tt ) * ) = > ( core ::format_args! ( $( $f ) * ) )
}