2017-12-18 06:00:59 +03:00
// SPDX-License-Identifier: LGPL-2.1
2012-12-10 23:04:46 +04:00
/*
* Copyright ( c ) 2012 Taobao .
* Written by Tao Ma < boyu . mt @ taobao . com >
*/
2015-04-12 07:56:28 +03:00
2017-10-02 00:57:54 +03:00
# include <linux/iomap.h>
2015-04-12 07:56:28 +03:00
# include <linux/fiemap.h>
2022-06-30 12:01:00 +03:00
# include <linux/namei.h>
2018-01-09 16:21:39 +03:00
# include <linux/iversion.h>
mm: introduce memalloc_retry_wait()
Various places in the kernel - largely in filesystems - respond to a
memory allocation failure by looping around and re-trying. Some of
these cannot conveniently use __GFP_NOFAIL, for reasons such as:
- a GFP_ATOMIC allocation, which __GFP_NOFAIL doesn't work on
- a need to check for the process being signalled between failures
- the possibility that other recovery actions could be performed
- the allocation is quite deep in support code, and passing down an
extra flag to say if __GFP_NOFAIL is wanted would be clumsy.
Many of these currently use congestion_wait() which (in almost all
cases) simply waits the given timeout - congestion isn't tracked for
most devices.
It isn't clear what the best delay is for loops, but it is clear that
the various filesystems shouldn't be responsible for choosing a timeout.
This patch introduces memalloc_retry_wait() with takes on that
responsibility. Code that wants to retry a memory allocation can call
this function passing the GFP flags that were used. It will wait
however is appropriate.
For now, it only considers __GFP_NORETRY and whatever
gfpflags_allow_blocking() tests. If blocking is allowed without
__GFP_NORETRY, then alloc_page either made some reclaim progress, or
waited for a while, before failing. So there is no need for much
further waiting. memalloc_retry_wait() will wait until the current
jiffie ends. If this condition is not met, then alloc_page() won't have
waited much if at all. In that case memalloc_retry_wait() waits about
200ms. This is the delay that most current loops uses.
linux/sched/mm.h needs to be included in some files now,
but linux/backing-dev.h does not.
Link: https://lkml.kernel.org/r/163754371968.13692.1277530886009912421@noble.neil.brown.name
Signed-off-by: NeilBrown <neilb@suse.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-01-15 01:07:14 +03:00
# include <linux/sched/mm.h>
2015-04-12 07:56:28 +03:00
2012-12-10 23:04:46 +04:00
# include "ext4_jbd2.h"
# include "ext4.h"
# include "xattr.h"
2012-12-10 23:05:51 +04:00
# include "truncate.h"
2012-12-10 23:04:46 +04:00
# define EXT4_XATTR_SYSTEM_DATA "data"
# define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
2013-04-20 01:53:09 +04:00
# define EXT4_INLINE_DOTDOT_OFFSET 2
# define EXT4_INLINE_DOTDOT_SIZE 4
2012-12-10 23:04:46 +04:00
2014-05-12 18:50:23 +04:00
static int ext4_get_inline_size ( struct inode * inode )
2012-12-10 23:04:46 +04:00
{
if ( EXT4_I ( inode ) - > i_inline_off )
return EXT4_I ( inode ) - > i_inline_size ;
return 0 ;
}
static int get_max_inline_xattr_value_size ( struct inode * inode ,
struct ext4_iloc * iloc )
{
struct ext4_xattr_ibody_header * header ;
struct ext4_xattr_entry * entry ;
struct ext4_inode * raw_inode ;
int free , min_offs ;
2022-06-16 05:13:57 +03:00
if ( ! EXT4_INODE_HAS_XATTR_SPACE ( inode ) )
return 0 ;
2012-12-10 23:04:46 +04:00
min_offs = EXT4_SB ( inode - > i_sb ) - > s_inode_size -
EXT4_GOOD_OLD_INODE_SIZE -
EXT4_I ( inode ) - > i_extra_isize -
sizeof ( struct ext4_xattr_ibody_header ) ;
/*
* We need to subtract another sizeof ( __u32 ) since an in - inode xattr
* needs an empty 4 bytes to indicate the gap between the xattr entry
* and the name / value pair .
*/
if ( ! ext4_test_inode_state ( inode , EXT4_STATE_XATTR ) )
return EXT4_XATTR_SIZE ( min_offs -
EXT4_XATTR_LEN ( strlen ( EXT4_XATTR_SYSTEM_DATA ) ) -
EXT4_XATTR_ROUND - sizeof ( __u32 ) ) ;
raw_inode = ext4_raw_inode ( iloc ) ;
header = IHDR ( inode , raw_inode ) ;
entry = IFIRST ( header ) ;
/* Compute min_offs. */
for ( ; ! IS_LAST_ENTRY ( entry ) ; entry = EXT4_XATTR_NEXT ( entry ) ) {
2017-06-22 04:10:32 +03:00
if ( ! entry - > e_value_inum & & entry - > e_value_size ) {
2012-12-10 23:04:46 +04:00
size_t offs = le16_to_cpu ( entry - > e_value_offs ) ;
if ( offs < min_offs )
min_offs = offs ;
}
}
free = min_offs -
( ( void * ) entry - ( void * ) IFIRST ( header ) ) - sizeof ( __u32 ) ;
if ( EXT4_I ( inode ) - > i_inline_off ) {
entry = ( struct ext4_xattr_entry * )
( ( void * ) raw_inode + EXT4_I ( inode ) - > i_inline_off ) ;
2013-07-01 16:12:37 +04:00
free + = EXT4_XATTR_SIZE ( le32_to_cpu ( entry - > e_value_size ) ) ;
2012-12-10 23:04:46 +04:00
goto out ;
}
free - = EXT4_XATTR_LEN ( strlen ( EXT4_XATTR_SYSTEM_DATA ) ) ;
if ( free > EXT4_XATTR_ROUND )
free = EXT4_XATTR_SIZE ( free - EXT4_XATTR_ROUND ) ;
else
free = 0 ;
out :
return free ;
}
/*
* Get the maximum size we now can store in an inode .
* If we can ' t find the space for a xattr entry , don ' t use the space
* of the extents since we have no space to indicate the inline data .
*/
int ext4_get_max_inline_size ( struct inode * inode )
{
int error , max_inline_size ;
struct ext4_iloc iloc ;
if ( EXT4_I ( inode ) - > i_extra_isize = = 0 )
return 0 ;
error = ext4_get_inode_loc ( inode , & iloc ) ;
if ( error ) {
2020-03-29 02:33:43 +03:00
ext4_error_inode_err ( inode , __func__ , __LINE__ , 0 , - error ,
" can't get inode location %lu " ,
inode - > i_ino ) ;
2012-12-10 23:04:46 +04:00
return 0 ;
}
down_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
max_inline_size = get_max_inline_xattr_value_size ( inode , & iloc ) ;
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
brelse ( iloc . bh ) ;
if ( ! max_inline_size )
return 0 ;
return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE ;
}
/*
* this function does not take xattr_sem , which is OK because it is
* currently only used in a code path coming form ext4_iget , before
* the new inode has been unlocked
*/
int ext4_find_inline_data_nolock ( struct inode * inode )
{
struct ext4_xattr_ibody_find is = {
. s = { . not_found = - ENODATA , } ,
} ;
struct ext4_xattr_info i = {
. name_index = EXT4_XATTR_INDEX_SYSTEM ,
. name = EXT4_XATTR_SYSTEM_DATA ,
} ;
int error ;
if ( EXT4_I ( inode ) - > i_extra_isize = = 0 )
return 0 ;
error = ext4_get_inode_loc ( inode , & is . iloc ) ;
if ( error )
return error ;
error = ext4_xattr_ibody_find ( inode , & i , & is ) ;
if ( error )
goto out ;
if ( ! is . s . not_found ) {
2018-05-22 23:15:24 +03:00
if ( is . s . here - > e_value_inum ) {
EXT4_ERROR_INODE ( inode , " inline data xattr refers "
" to an external xattr inode " ) ;
error = - EFSCORRUPTED ;
goto out ;
}
2012-12-10 23:04:46 +04:00
EXT4_I ( inode ) - > i_inline_off = ( u16 ) ( ( void * ) is . s . here -
( void * ) ext4_raw_inode ( & is . iloc ) ) ;
EXT4_I ( inode ) - > i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu ( is . s . here - > e_value_size ) ;
}
out :
brelse ( is . iloc . bh ) ;
return error ;
}
static int ext4_read_inline_data ( struct inode * inode , void * buffer ,
unsigned int len ,
struct ext4_iloc * iloc )
{
struct ext4_xattr_entry * entry ;
struct ext4_xattr_ibody_header * header ;
int cp_len = 0 ;
struct ext4_inode * raw_inode ;
if ( ! len )
return 0 ;
BUG_ON ( len > EXT4_I ( inode ) - > i_inline_size ) ;
2022-08-17 05:59:28 +03:00
cp_len = min_t ( unsigned int , len , EXT4_MIN_INLINE_DATA_SIZE ) ;
2012-12-10 23:04:46 +04:00
raw_inode = ext4_raw_inode ( iloc ) ;
memcpy ( buffer , ( void * ) ( raw_inode - > i_block ) , cp_len ) ;
len - = cp_len ;
buffer + = cp_len ;
if ( ! len )
goto out ;
header = IHDR ( inode , raw_inode ) ;
entry = ( struct ext4_xattr_entry * ) ( ( void * ) raw_inode +
EXT4_I ( inode ) - > i_inline_off ) ;
len = min_t ( unsigned int , len ,
( unsigned int ) le32_to_cpu ( entry - > e_value_size ) ) ;
memcpy ( buffer ,
( void * ) IFIRST ( header ) + le16_to_cpu ( entry - > e_value_offs ) , len ) ;
cp_len + = len ;
out :
return cp_len ;
}
/*
* write the buffer to the inline inode .
* If ' create ' is set , we don ' t need to do the extra copy in the xattr
2021-06-03 05:03:02 +03:00
* value since it is already handled by ext4_xattr_ibody_set .
2012-12-10 23:06:02 +04:00
* That saves us one memcpy .
2012-12-10 23:04:46 +04:00
*/
2014-05-12 18:50:23 +04:00
static void ext4_write_inline_data ( struct inode * inode , struct ext4_iloc * iloc ,
void * buffer , loff_t pos , unsigned int len )
2012-12-10 23:04:46 +04:00
{
struct ext4_xattr_entry * entry ;
struct ext4_xattr_ibody_header * header ;
struct ext4_inode * raw_inode ;
int cp_len = 0 ;
2017-02-05 09:28:48 +03:00
if ( unlikely ( ext4_forced_shutdown ( EXT4_SB ( inode - > i_sb ) ) ) )
return ;
2012-12-10 23:04:46 +04:00
BUG_ON ( ! EXT4_I ( inode ) - > i_inline_off ) ;
BUG_ON ( pos + len > EXT4_I ( inode ) - > i_inline_size ) ;
raw_inode = ext4_raw_inode ( iloc ) ;
buffer + = pos ;
if ( pos < EXT4_MIN_INLINE_DATA_SIZE ) {
cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ?
EXT4_MIN_INLINE_DATA_SIZE - pos : len ;
memcpy ( ( void * ) raw_inode - > i_block + pos , buffer , cp_len ) ;
len - = cp_len ;
buffer + = cp_len ;
pos + = cp_len ;
}
if ( ! len )
return ;
pos - = EXT4_MIN_INLINE_DATA_SIZE ;
header = IHDR ( inode , raw_inode ) ;
entry = ( struct ext4_xattr_entry * ) ( ( void * ) raw_inode +
EXT4_I ( inode ) - > i_inline_off ) ;
memcpy ( ( void * ) IFIRST ( header ) + le16_to_cpu ( entry - > e_value_offs ) + pos ,
buffer , len ) ;
}
static int ext4_create_inline_data ( handle_t * handle ,
struct inode * inode , unsigned len )
{
int error ;
void * value = NULL ;
struct ext4_xattr_ibody_find is = {
. s = { . not_found = - ENODATA , } ,
} ;
struct ext4_xattr_info i = {
. name_index = EXT4_XATTR_INDEX_SYSTEM ,
. name = EXT4_XATTR_SYSTEM_DATA ,
} ;
error = ext4_get_inode_loc ( inode , & is . iloc ) ;
if ( error )
return error ;
2014-05-13 06:06:43 +04:00
BUFFER_TRACE ( is . iloc . bh , " get_write_access " ) ;
2021-08-16 12:57:04 +03:00
error = ext4_journal_get_write_access ( handle , inode - > i_sb , is . iloc . bh ,
EXT4_JTR_NONE ) ;
2012-12-10 23:04:46 +04:00
if ( error )
goto out ;
if ( len > EXT4_MIN_INLINE_DATA_SIZE ) {
2012-12-11 12:31:49 +04:00
value = EXT4_ZERO_XATTR_VALUE ;
2012-12-10 23:04:46 +04:00
len - = EXT4_MIN_INLINE_DATA_SIZE ;
} else {
value = " " ;
len = 0 ;
}
2020-04-24 20:16:24 +03:00
/* Insert the xttr entry. */
2012-12-10 23:04:46 +04:00
i . value = value ;
i . value_len = len ;
error = ext4_xattr_ibody_find ( inode , & i , & is ) ;
if ( error )
goto out ;
BUG_ON ( ! is . s . not_found ) ;
2021-06-03 05:03:02 +03:00
error = ext4_xattr_ibody_set ( handle , inode , & i , & is ) ;
2012-12-10 23:04:46 +04:00
if ( error ) {
if ( error = = - ENOSPC )
ext4_clear_inode_state ( inode ,
EXT4_STATE_MAY_INLINE_DATA ) ;
goto out ;
}
memset ( ( void * ) ext4_raw_inode ( & is . iloc ) - > i_block ,
0 , EXT4_MIN_INLINE_DATA_SIZE ) ;
EXT4_I ( inode ) - > i_inline_off = ( u16 ) ( ( void * ) is . s . here -
( void * ) ext4_raw_inode ( & is . iloc ) ) ;
EXT4_I ( inode ) - > i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE ;
ext4_clear_inode_flag ( inode , EXT4_INODE_EXTENTS ) ;
ext4_set_inode_flag ( inode , EXT4_INODE_INLINE_DATA ) ;
get_bh ( is . iloc . bh ) ;
error = ext4_mark_iloc_dirty ( handle , inode , & is . iloc ) ;
out :
brelse ( is . iloc . bh ) ;
return error ;
}
static int ext4_update_inline_data ( handle_t * handle , struct inode * inode ,
unsigned int len )
{
int error ;
void * value = NULL ;
struct ext4_xattr_ibody_find is = {
. s = { . not_found = - ENODATA , } ,
} ;
struct ext4_xattr_info i = {
. name_index = EXT4_XATTR_INDEX_SYSTEM ,
. name = EXT4_XATTR_SYSTEM_DATA ,
} ;
/* If the old space is ok, write the data directly. */
if ( len < = EXT4_I ( inode ) - > i_inline_size )
return 0 ;
error = ext4_get_inode_loc ( inode , & is . iloc ) ;
if ( error )
return error ;
error = ext4_xattr_ibody_find ( inode , & i , & is ) ;
if ( error )
goto out ;
BUG_ON ( is . s . not_found ) ;
len - = EXT4_MIN_INLINE_DATA_SIZE ;
value = kzalloc ( len , GFP_NOFS ) ;
2016-12-10 17:56:01 +03:00
if ( ! value ) {
error = - ENOMEM ;
2012-12-10 23:04:46 +04:00
goto out ;
2016-12-10 17:56:01 +03:00
}
2012-12-10 23:04:46 +04:00
error = ext4_xattr_ibody_get ( inode , i . name_index , i . name ,
value , len ) ;
if ( error = = - ENODATA )
goto out ;
2014-05-13 06:06:43 +04:00
BUFFER_TRACE ( is . iloc . bh , " get_write_access " ) ;
2021-08-16 12:57:04 +03:00
error = ext4_journal_get_write_access ( handle , inode - > i_sb , is . iloc . bh ,
EXT4_JTR_NONE ) ;
2012-12-10 23:04:46 +04:00
if ( error )
goto out ;
2020-08-05 05:48:50 +03:00
/* Update the xattr entry. */
2012-12-10 23:04:46 +04:00
i . value = value ;
i . value_len = len ;
2021-06-03 05:03:02 +03:00
error = ext4_xattr_ibody_set ( handle , inode , & i , & is ) ;
2012-12-10 23:04:46 +04:00
if ( error )
goto out ;
EXT4_I ( inode ) - > i_inline_off = ( u16 ) ( ( void * ) is . s . here -
( void * ) ext4_raw_inode ( & is . iloc ) ) ;
EXT4_I ( inode ) - > i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu ( is . s . here - > e_value_size ) ;
ext4_set_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) ;
get_bh ( is . iloc . bh ) ;
error = ext4_mark_iloc_dirty ( handle , inode , & is . iloc ) ;
out :
kfree ( value ) ;
brelse ( is . iloc . bh ) ;
return error ;
}
2014-05-12 18:50:23 +04:00
static int ext4_prepare_inline_data ( handle_t * handle , struct inode * inode ,
unsigned int len )
2012-12-10 23:04:46 +04:00
{
2017-01-12 05:50:46 +03:00
int ret , size , no_expand ;
2012-12-10 23:04:46 +04:00
struct ext4_inode_info * ei = EXT4_I ( inode ) ;
if ( ! ext4_test_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) )
return - ENOSPC ;
size = ext4_get_max_inline_size ( inode ) ;
if ( size < len )
return - ENOSPC ;
2017-01-12 05:50:46 +03:00
ext4_write_lock_xattr ( inode , & no_expand ) ;
2012-12-10 23:04:46 +04:00
if ( ei - > i_inline_off )
ret = ext4_update_inline_data ( handle , inode , len ) ;
else
ret = ext4_create_inline_data ( handle , inode , len ) ;
2017-01-12 05:50:46 +03:00
ext4_write_unlock_xattr ( inode , & no_expand ) ;
2012-12-10 23:04:46 +04:00
return ret ;
}
static int ext4_destroy_inline_data_nolock ( handle_t * handle ,
struct inode * inode )
{
struct ext4_inode_info * ei = EXT4_I ( inode ) ;
struct ext4_xattr_ibody_find is = {
. s = { . not_found = 0 , } ,
} ;
struct ext4_xattr_info i = {
. name_index = EXT4_XATTR_INDEX_SYSTEM ,
. name = EXT4_XATTR_SYSTEM_DATA ,
. value = NULL ,
. value_len = 0 ,
} ;
int error ;
if ( ! ei - > i_inline_off )
return 0 ;
error = ext4_get_inode_loc ( inode , & is . iloc ) ;
if ( error )
return error ;
error = ext4_xattr_ibody_find ( inode , & i , & is ) ;
if ( error )
goto out ;
2014-05-13 06:06:43 +04:00
BUFFER_TRACE ( is . iloc . bh , " get_write_access " ) ;
2021-08-16 12:57:04 +03:00
error = ext4_journal_get_write_access ( handle , inode - > i_sb , is . iloc . bh ,
EXT4_JTR_NONE ) ;
2012-12-10 23:04:46 +04:00
if ( error )
goto out ;
2021-06-03 05:03:02 +03:00
error = ext4_xattr_ibody_set ( handle , inode , & i , & is ) ;
2012-12-10 23:04:46 +04:00
if ( error )
goto out ;
memset ( ( void * ) ext4_raw_inode ( & is . iloc ) - > i_block ,
0 , EXT4_MIN_INLINE_DATA_SIZE ) ;
2018-06-15 19:28:16 +03:00
memset ( ei - > i_data , 0 , EXT4_MIN_INLINE_DATA_SIZE ) ;
2012-12-10 23:04:46 +04:00
2015-10-17 23:18:43 +03:00
if ( ext4_has_feature_extents ( inode - > i_sb ) ) {
2012-12-10 23:04:46 +04:00
if ( S_ISDIR ( inode - > i_mode ) | |
S_ISREG ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) ) {
ext4_set_inode_flag ( inode , EXT4_INODE_EXTENTS ) ;
ext4_ext_tree_init ( handle , inode ) ;
}
}
ext4_clear_inode_flag ( inode , EXT4_INODE_INLINE_DATA ) ;
get_bh ( is . iloc . bh ) ;
error = ext4_mark_iloc_dirty ( handle , inode , & is . iloc ) ;
EXT4_I ( inode ) - > i_inline_off = 0 ;
EXT4_I ( inode ) - > i_inline_size = 0 ;
ext4_clear_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) ;
out :
brelse ( is . iloc . bh ) ;
if ( error = = - ENODATA )
error = 0 ;
return error ;
}
2012-12-10 23:04:52 +04:00
static int ext4_read_inline_page ( struct inode * inode , struct page * page )
{
void * kaddr ;
int ret = 0 ;
size_t len ;
struct ext4_iloc iloc ;
BUG_ON ( ! PageLocked ( page ) ) ;
BUG_ON ( ! ext4_has_inline_data ( inode ) ) ;
BUG_ON ( page - > index ) ;
if ( ! EXT4_I ( inode ) - > i_inline_off ) {
ext4_warning ( inode - > i_sb , " inode %lu doesn't have inline data. " ,
inode - > i_ino ) ;
goto out ;
}
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret )
goto out ;
len = min_t ( size_t , ext4_get_inline_size ( inode ) , i_size_read ( inode ) ) ;
kaddr = kmap_atomic ( page ) ;
ret = ext4_read_inline_data ( inode , kaddr , len , & iloc ) ;
flush_dcache_page ( page ) ;
kunmap_atomic ( kaddr ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
zero_user_segment ( page , len , PAGE_SIZE ) ;
2012-12-10 23:04:52 +04:00
SetPageUptodate ( page ) ;
brelse ( iloc . bh ) ;
out :
return ret ;
}
2023-03-24 21:01:09 +03:00
int ext4_readpage_inline ( struct inode * inode , struct folio * folio )
2012-12-10 23:04:52 +04:00
{
int ret = 0 ;
down_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( ! ext4_has_inline_data ( inode ) ) {
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
return - EAGAIN ;
}
/*
* Current inline data can only exist in the 1 st page ,
* So for all the other pages , just set them uptodate .
*/
2023-03-24 21:01:09 +03:00
if ( ! folio - > index )
ret = ext4_read_inline_page ( inode , & folio - > page ) ;
else if ( ! folio_test_uptodate ( folio ) ) {
folio_zero_segment ( folio , 0 , folio_size ( folio ) ) ;
folio_mark_uptodate ( folio ) ;
2012-12-10 23:04:52 +04:00
}
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
2023-03-24 21:01:09 +03:00
folio_unlock ( folio ) ;
2012-12-10 23:04:52 +04:00
return ret > = 0 ? 0 : ret ;
}
2012-12-10 23:05:51 +04:00
static int ext4_convert_inline_data_to_extent ( struct address_space * mapping ,
2022-02-22 18:36:28 +03:00
struct inode * inode )
2012-12-10 23:05:51 +04:00
{
2017-01-12 05:50:46 +03:00
int ret , needed_blocks , no_expand ;
2012-12-10 23:05:51 +04:00
handle_t * handle = NULL ;
int retries = 0 , sem_held = 0 ;
struct page * page = NULL ;
2022-02-22 18:36:28 +03:00
unsigned int flags ;
2012-12-10 23:05:51 +04:00
unsigned from , to ;
struct ext4_iloc iloc ;
if ( ! ext4_has_inline_data ( inode ) ) {
/*
* clear the flag so that no new write
* will trap here again .
*/
ext4_clear_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) ;
return 0 ;
}
needed_blocks = ext4_writepage_trans_blocks ( inode ) ;
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret )
return ret ;
retry :
2013-02-09 06:59:22 +04:00
handle = ext4_journal_start ( inode , EXT4_HT_WRITE_PAGE , needed_blocks ) ;
2012-12-10 23:05:51 +04:00
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
handle = NULL ;
goto out ;
}
/* We cannot recurse into the filesystem as the transaction is already
* started */
2022-02-22 18:36:28 +03:00
flags = memalloc_nofs_save ( ) ;
2022-02-22 19:25:12 +03:00
page = grab_cache_page_write_begin ( mapping , 0 ) ;
2022-02-22 18:36:28 +03:00
memalloc_nofs_restore ( flags ) ;
2012-12-10 23:05:51 +04:00
if ( ! page ) {
ret = - ENOMEM ;
goto out ;
}
2017-01-12 05:50:46 +03:00
ext4_write_lock_xattr ( inode , & no_expand ) ;
2012-12-10 23:05:51 +04:00
sem_held = 1 ;
/* If some one has already done this for us, just exit. */
if ( ! ext4_has_inline_data ( inode ) ) {
ret = 0 ;
goto out ;
}
from = 0 ;
to = ext4_get_inline_size ( inode ) ;
if ( ! PageUptodate ( page ) ) {
ret = ext4_read_inline_page ( inode , page ) ;
if ( ret < 0 )
goto out ;
}
ret = ext4_destroy_inline_data_nolock ( handle , inode ) ;
if ( ret )
goto out ;
2016-03-09 07:08:10 +03:00
if ( ext4_should_dioread_nolock ( inode ) ) {
ret = __block_write_begin ( page , from , to ,
ext4_get_block_unwritten ) ;
} else
2012-12-10 23:05:51 +04:00
ret = __block_write_begin ( page , from , to , ext4_get_block ) ;
if ( ! ret & & ext4_should_journal_data ( inode ) ) {
2021-08-16 12:57:04 +03:00
ret = ext4_walk_page_buffers ( handle , inode , page_buffers ( page ) ,
2012-12-10 23:05:51 +04:00
from , to , NULL ,
do_journal_get_write_access ) ;
}
if ( ret ) {
unlock_page ( page ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( page ) ;
2014-09-11 19:45:12 +04:00
page = NULL ;
2012-12-10 23:05:51 +04:00
ext4_orphan_add ( handle , inode ) ;
2017-01-12 05:50:46 +03:00
ext4_write_unlock_xattr ( inode , & no_expand ) ;
2012-12-10 23:05:51 +04:00
sem_held = 0 ;
ext4_journal_stop ( handle ) ;
handle = NULL ;
ext4_truncate_failed_write ( inode ) ;
/*
* If truncate failed early the inode might
* still be on the orphan list ; we need to
* make sure the inode is removed from the
* orphan list in that case .
*/
if ( inode - > i_nlink )
ext4_orphan_del ( NULL , inode ) ;
}
if ( ret = = - ENOSPC & & ext4_should_retry_alloc ( inode - > i_sb , & retries ) )
goto retry ;
2014-09-11 19:45:12 +04:00
if ( page )
block_commit_write ( page , from , to ) ;
2012-12-10 23:05:51 +04:00
out :
if ( page ) {
unlock_page ( page ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( page ) ;
2012-12-10 23:05:51 +04:00
}
if ( sem_held )
2017-01-12 05:50:46 +03:00
ext4_write_unlock_xattr ( inode , & no_expand ) ;
2012-12-10 23:05:51 +04:00
if ( handle )
ext4_journal_stop ( handle ) ;
brelse ( iloc . bh ) ;
return ret ;
}
/*
* Try to write data in the inode .
* If the inode has inline data , check whether the new write can be
* in the inode also . If not , create the page the handle , move the data
* to the page make it update and let the later codes create extent for it .
*/
int ext4_try_to_write_inline_data ( struct address_space * mapping ,
struct inode * inode ,
loff_t pos , unsigned len ,
struct page * * pagep )
{
int ret ;
handle_t * handle ;
2022-02-22 18:36:28 +03:00
unsigned int flags ;
2012-12-10 23:05:51 +04:00
struct page * page ;
struct ext4_iloc iloc ;
if ( pos + len > ext4_get_max_inline_size ( inode ) )
goto convert ;
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret )
return ret ;
/*
* The possible write could happen in the inode ,
* so try to reserve the space in inode first .
*/
2013-02-09 06:59:22 +04:00
handle = ext4_journal_start ( inode , EXT4_HT_INODE , 1 ) ;
2012-12-10 23:05:51 +04:00
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
handle = NULL ;
goto out ;
}
ret = ext4_prepare_inline_data ( handle , inode , pos + len ) ;
if ( ret & & ret ! = - ENOSPC )
goto out ;
/* We don't have space in inline inode, so convert it to extent. */
if ( ret = = - ENOSPC ) {
ext4_journal_stop ( handle ) ;
brelse ( iloc . bh ) ;
goto convert ;
}
2021-08-16 12:57:04 +03:00
ret = ext4_journal_get_write_access ( handle , inode - > i_sb , iloc . bh ,
EXT4_JTR_NONE ) ;
2018-07-10 08:07:43 +03:00
if ( ret )
goto out ;
2022-02-22 18:36:28 +03:00
flags = memalloc_nofs_save ( ) ;
2022-02-22 19:25:12 +03:00
page = grab_cache_page_write_begin ( mapping , 0 ) ;
2022-02-22 18:36:28 +03:00
memalloc_nofs_restore ( flags ) ;
2012-12-10 23:05:51 +04:00
if ( ! page ) {
ret = - ENOMEM ;
goto out ;
}
* pagep = page ;
down_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( ! ext4_has_inline_data ( inode ) ) {
ret = 0 ;
unlock_page ( page ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( page ) ;
2012-12-10 23:05:51 +04:00
goto out_up_read ;
}
if ( ! PageUptodate ( page ) ) {
ret = ext4_read_inline_page ( inode , page ) ;
2018-12-04 08:06:53 +03:00
if ( ret < 0 ) {
unlock_page ( page ) ;
put_page ( page ) ;
2012-12-10 23:05:51 +04:00
goto out_up_read ;
2018-12-04 08:06:53 +03:00
}
2012-12-10 23:05:51 +04:00
}
ret = 1 ;
handle = NULL ;
out_up_read :
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
out :
2018-07-10 08:07:43 +03:00
if ( handle & & ( ret ! = 1 ) )
2012-12-10 23:05:51 +04:00
ext4_journal_stop ( handle ) ;
brelse ( iloc . bh ) ;
return ret ;
convert :
2022-02-22 18:36:28 +03:00
return ext4_convert_inline_data_to_extent ( mapping , inode ) ;
2012-12-10 23:05:51 +04:00
}
int ext4_write_inline_data_end ( struct inode * inode , loff_t pos , unsigned len ,
unsigned copied , struct page * page )
{
2021-07-16 15:20:23 +03:00
handle_t * handle = ext4_journal_current_handle ( ) ;
int no_expand ;
2012-12-10 23:05:51 +04:00
void * kaddr ;
struct ext4_iloc iloc ;
2021-07-16 15:20:23 +03:00
int ret = 0 , ret2 ;
2012-12-10 23:05:51 +04:00
2021-07-16 15:20:22 +03:00
if ( unlikely ( copied < len ) & & ! PageUptodate ( page ) )
2021-07-16 15:20:23 +03:00
copied = 0 ;
2012-12-10 23:05:51 +04:00
2021-07-16 15:20:23 +03:00
if ( likely ( copied ) ) {
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret ) {
unlock_page ( page ) ;
put_page ( page ) ;
ext4_std_error ( inode - > i_sb , ret ) ;
2012-12-10 23:05:51 +04:00
goto out ;
}
2021-07-16 15:20:23 +03:00
ext4_write_lock_xattr ( inode , & no_expand ) ;
BUG_ON ( ! ext4_has_inline_data ( inode ) ) ;
2012-12-10 23:05:51 +04:00
2021-09-05 06:46:32 +03:00
/*
* ei - > i_inline_off may have changed since
* ext4_write_begin ( ) called
* ext4_try_to_write_inline_data ( )
*/
( void ) ext4_find_inline_data_nolock ( inode ) ;
2012-12-10 23:05:51 +04:00
2021-07-16 15:20:23 +03:00
kaddr = kmap_atomic ( page ) ;
ext4_write_inline_data ( inode , & iloc , kaddr , pos , copied ) ;
kunmap_atomic ( kaddr ) ;
SetPageUptodate ( page ) ;
/* clear page dirty so that writepages wouldn't work for us. */
ClearPageDirty ( page ) ;
2012-12-10 23:05:51 +04:00
2021-07-16 15:20:23 +03:00
ext4_write_unlock_xattr ( inode , & no_expand ) ;
brelse ( iloc . bh ) ;
2021-08-21 06:44:17 +03:00
2021-07-16 15:20:23 +03:00
/*
* It ' s important to update i_size while still holding page
* lock : page writeout could otherwise come in and zero
* beyond i_size .
*/
ext4_update_inode_size ( inode , pos + copied ) ;
}
unlock_page ( page ) ;
put_page ( page ) ;
2012-12-10 23:05:51 +04:00
2021-07-16 15:20:23 +03:00
/*
* Don ' t mark the inode dirty under page lock . First , it unnecessarily
* makes the holding time of page lock longer . Second , it forces lock
* ordering of page lock and transaction start for journaling
* filesystems .
*/
if ( likely ( copied ) )
mark_inode_dirty ( inode ) ;
2012-12-10 23:05:51 +04:00
out :
2021-07-16 15:20:23 +03:00
/*
* If we didn ' t copy as much data as expected , we need to trim back
* size of xattr containing inline data .
*/
if ( pos + len > inode - > i_size & & ext4_can_truncate ( inode ) )
ext4_orphan_add ( handle , inode ) ;
2021-07-16 15:20:22 +03:00
2021-07-16 15:20:23 +03:00
ret2 = ext4_journal_stop ( handle ) ;
if ( ! ret )
ret = ret2 ;
if ( pos + len > inode - > i_size ) {
ext4_truncate_failed_write ( inode ) ;
/*
* If truncate failed early the inode might still be
* on the orphan list ; we need to make sure the inode
* is removed from the orphan list in that case .
*/
if ( inode - > i_nlink )
ext4_orphan_del ( NULL , inode ) ;
}
return ret ? ret : copied ;
2012-12-10 23:05:51 +04:00
}
2012-12-10 23:05:57 +04:00
struct buffer_head *
ext4_journalled_write_inline_data ( struct inode * inode ,
unsigned len ,
struct page * page )
{
2017-01-12 05:50:46 +03:00
int ret , no_expand ;
2012-12-10 23:05:57 +04:00
void * kaddr ;
struct ext4_iloc iloc ;
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret ) {
ext4_std_error ( inode - > i_sb , ret ) ;
return NULL ;
}
2017-01-12 05:50:46 +03:00
ext4_write_lock_xattr ( inode , & no_expand ) ;
2012-12-10 23:05:57 +04:00
kaddr = kmap_atomic ( page ) ;
ext4_write_inline_data ( inode , & iloc , kaddr , 0 , len ) ;
kunmap_atomic ( kaddr ) ;
2017-01-12 05:50:46 +03:00
ext4_write_unlock_xattr ( inode , & no_expand ) ;
2012-12-10 23:05:57 +04:00
return iloc . bh ;
}
2012-12-10 23:05:57 +04:00
/*
* Try to make the page cache and handle ready for the inline data case .
* We can call this function in 2 cases :
* 1. The inode is created and the first write exceeds inline size . We can
* clear the inode state safely .
* 2. The inode has inline data , then we need to read the data , make it
* update and dirty so that ext4_da_writepages can handle it . We don ' t
2021-03-27 13:30:05 +03:00
* need to start the journal since the file ' s metadata isn ' t changed now .
2012-12-10 23:05:57 +04:00
*/
static int ext4_da_convert_inline_data_to_extent ( struct address_space * mapping ,
struct inode * inode ,
void * * fsdata )
{
int ret = 0 , inline_size ;
struct page * page ;
2022-02-22 19:25:12 +03:00
page = grab_cache_page_write_begin ( mapping , 0 ) ;
2012-12-10 23:05:57 +04:00
if ( ! page )
return - ENOMEM ;
down_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( ! ext4_has_inline_data ( inode ) ) {
ext4_clear_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) ;
goto out ;
}
inline_size = ext4_get_inline_size ( inode ) ;
if ( ! PageUptodate ( page ) ) {
ret = ext4_read_inline_page ( inode , page ) ;
if ( ret < 0 )
goto out ;
}
ret = __block_write_begin ( page , 0 , inline_size ,
ext4_da_get_block_prep ) ;
if ( ret ) {
2014-12-06 05:37:15 +03:00
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
unlock_page ( page ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( page ) ;
2012-12-10 23:05:57 +04:00
ext4_truncate_failed_write ( inode ) ;
2014-12-06 05:37:15 +03:00
return ret ;
2012-12-10 23:05:57 +04:00
}
SetPageDirty ( page ) ;
SetPageUptodate ( page ) ;
ext4_clear_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) ;
* fsdata = ( void * ) CONVERT_INLINE_DATA ;
out :
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( page ) {
unlock_page ( page ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( page ) ;
2012-12-10 23:05:57 +04:00
}
return ret ;
}
/*
* Prepare the write for the inline data .
2020-01-23 09:43:25 +03:00
* If the data can be written into the inode , we just read
2012-12-10 23:05:57 +04:00
* the page and make it uptodate , and start the journal .
* Otherwise read the page , makes it dirty so that it can be
* handle in writepages ( the i_disksize update is left to the
* normal ext4_da_write_end ) .
*/
int ext4_da_write_inline_data_begin ( struct address_space * mapping ,
struct inode * inode ,
loff_t pos , unsigned len ,
struct page * * pagep ,
void * * fsdata )
{
2022-01-17 15:11:48 +03:00
int ret ;
2012-12-10 23:05:57 +04:00
handle_t * handle ;
struct page * page ;
struct ext4_iloc iloc ;
2018-10-03 04:18:45 +03:00
int retries = 0 ;
2022-02-22 18:36:28 +03:00
unsigned int flags ;
2012-12-10 23:05:57 +04:00
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret )
return ret ;
2014-01-06 23:02:23 +04:00
retry_journal :
2013-02-09 06:59:22 +04:00
handle = ext4_journal_start ( inode , EXT4_HT_INODE , 1 ) ;
2012-12-10 23:05:57 +04:00
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
goto out ;
}
2022-01-17 15:11:48 +03:00
ret = ext4_prepare_inline_data ( handle , inode , pos + len ) ;
if ( ret & & ret ! = - ENOSPC )
goto out_journal ;
2012-12-10 23:05:57 +04:00
if ( ret = = - ENOSPC ) {
2018-06-17 06:41:59 +03:00
ext4_journal_stop ( handle ) ;
2012-12-10 23:05:57 +04:00
ret = ext4_da_convert_inline_data_to_extent ( mapping ,
inode ,
fsdata ) ;
2014-01-06 23:02:23 +04:00
if ( ret = = - ENOSPC & &
ext4_should_retry_alloc ( inode - > i_sb , & retries ) )
goto retry_journal ;
2012-12-10 23:05:57 +04:00
goto out ;
}
2022-02-22 18:36:28 +03:00
/*
* We cannot recurse into the filesystem as the transaction
* is already started .
*/
flags = memalloc_nofs_save ( ) ;
2022-02-22 19:25:12 +03:00
page = grab_cache_page_write_begin ( mapping , 0 ) ;
2022-02-22 18:36:28 +03:00
memalloc_nofs_restore ( flags ) ;
2012-12-10 23:05:57 +04:00
if ( ! page ) {
ret = - ENOMEM ;
2014-01-06 23:03:23 +04:00
goto out_journal ;
2012-12-10 23:05:57 +04:00
}
down_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( ! ext4_has_inline_data ( inode ) ) {
ret = 0 ;
goto out_release_page ;
}
if ( ! PageUptodate ( page ) ) {
ret = ext4_read_inline_page ( inode , page ) ;
if ( ret < 0 )
goto out_release_page ;
}
2021-08-16 12:57:04 +03:00
ret = ext4_journal_get_write_access ( handle , inode - > i_sb , iloc . bh ,
EXT4_JTR_NONE ) ;
2018-07-10 08:07:43 +03:00
if ( ret )
goto out_release_page ;
2012-12-10 23:05:57 +04:00
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
* pagep = page ;
brelse ( iloc . bh ) ;
return 1 ;
out_release_page :
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
unlock_page ( page ) ;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
put_page ( page ) ;
2014-01-06 23:03:23 +04:00
out_journal :
ext4_journal_stop ( handle ) ;
2012-12-10 23:05:57 +04:00
out :
brelse ( iloc . bh ) ;
return ret ;
}
2012-12-10 23:05:59 +04:00
# ifdef INLINE_DIR_DEBUG
void ext4_show_inline_dir ( struct inode * dir , struct buffer_head * bh ,
void * inline_start , int inline_size )
{
int offset ;
unsigned short de_len ;
struct ext4_dir_entry_2 * de = inline_start ;
void * dlimit = inline_start + inline_size ;
trace_printk ( " inode %lu \n " , dir - > i_ino ) ;
offset = 0 ;
while ( ( void * ) de < dlimit ) {
de_len = ext4_rec_len_from_disk ( de - > rec_len , inline_size ) ;
2015-04-02 23:42:43 +03:00
trace_printk ( " de: off %u rlen %u name %.*s nlen %u ino %u \n " ,
2012-12-10 23:05:59 +04:00
offset , de_len , de - > name_len , de - > name ,
de - > name_len , le32_to_cpu ( de - > inode ) ) ;
if ( ext4_check_dir_entry ( dir , NULL , de , bh ,
inline_start , inline_size , offset ) )
BUG ( ) ;
offset + = de_len ;
de = ( struct ext4_dir_entry_2 * ) ( ( char * ) de + de_len ) ;
}
}
# else
# define ext4_show_inline_dir(dir, bh, inline_start, inline_size)
# endif
/*
* Add a new entry into a inline dir .
* It will return - ENOSPC if no space is available , and - EIO
* and - EEXIST if directory entry already exists .
*/
static int ext4_add_dirent_to_inline ( handle_t * handle ,
2015-05-18 20:14:47 +03:00
struct ext4_filename * fname ,
2016-01-09 00:00:31 +03:00
struct inode * dir ,
2012-12-10 23:05:59 +04:00
struct inode * inode ,
struct ext4_iloc * iloc ,
void * inline_start , int inline_size )
{
int err ;
struct ext4_dir_entry_2 * de ;
2015-05-18 20:14:47 +03:00
err = ext4_find_dest_de ( dir , inode , iloc - > bh , inline_start ,
inline_size , fname , & de ) ;
2012-12-10 23:05:59 +04:00
if ( err )
return err ;
2014-05-13 06:06:43 +04:00
BUFFER_TRACE ( iloc - > bh , " get_write_access " ) ;
2021-08-16 12:57:04 +03:00
err = ext4_journal_get_write_access ( handle , dir - > i_sb , iloc - > bh ,
EXT4_JTR_NONE ) ;
2012-12-10 23:05:59 +04:00
if ( err )
return err ;
2021-03-19 10:34:13 +03:00
ext4_insert_dentry ( dir , inode , de , inline_size , fname ) ;
2012-12-10 23:05:59 +04:00
ext4_show_inline_dir ( dir , iloc - > bh , inline_start , inline_size ) ;
/*
* XXX shouldn ' t update any times until successful
* completion of syscall , but too many callers depend
* on this .
*
* XXX similarly , too many callers depend on
* ext4_new_inode ( ) setting the times , but error
* recovery deletes the inode , so the worst that can
* happen is that the times are slightly out of date
* and / or different from the directory change time .
*/
2016-11-15 05:40:10 +03:00
dir - > i_mtime = dir - > i_ctime = current_time ( dir ) ;
2012-12-10 23:05:59 +04:00
ext4_update_dx_flag ( dir ) ;
2018-01-09 16:21:39 +03:00
inode_inc_iversion ( dir ) ;
2012-12-10 23:05:59 +04:00
return 1 ;
}
static void * ext4_get_inline_xattr_pos ( struct inode * inode ,
struct ext4_iloc * iloc )
{
struct ext4_xattr_entry * entry ;
struct ext4_xattr_ibody_header * header ;
BUG_ON ( ! EXT4_I ( inode ) - > i_inline_off ) ;
header = IHDR ( inode , ext4_raw_inode ( iloc ) ) ;
entry = ( struct ext4_xattr_entry * ) ( ( void * ) ext4_raw_inode ( iloc ) +
EXT4_I ( inode ) - > i_inline_off ) ;
return ( void * ) IFIRST ( header ) + le16_to_cpu ( entry - > e_value_offs ) ;
}
/* Set the final de to cover the whole block. */
static void ext4_update_final_de ( void * de_buf , int old_size , int new_size )
{
struct ext4_dir_entry_2 * de , * prev_de ;
void * limit ;
int de_len ;
2022-04-01 11:13:21 +03:00
de = de_buf ;
2012-12-10 23:05:59 +04:00
if ( old_size ) {
limit = de_buf + old_size ;
do {
prev_de = de ;
de_len = ext4_rec_len_from_disk ( de - > rec_len , old_size ) ;
de_buf + = de_len ;
2022-04-01 11:13:21 +03:00
de = de_buf ;
2012-12-10 23:05:59 +04:00
} while ( de_buf < limit ) ;
prev_de - > rec_len = ext4_rec_len_to_disk ( de_len + new_size -
old_size , new_size ) ;
} else {
/* this is just created, so create an empty entry. */
de - > inode = 0 ;
de - > rec_len = ext4_rec_len_to_disk ( new_size , new_size ) ;
}
}
static int ext4_update_inline_dir ( handle_t * handle , struct inode * dir ,
struct ext4_iloc * iloc )
{
int ret ;
int old_size = EXT4_I ( dir ) - > i_inline_size - EXT4_MIN_INLINE_DATA_SIZE ;
int new_size = get_max_inline_xattr_value_size ( dir , iloc ) ;
2021-03-19 10:34:13 +03:00
if ( new_size - old_size < = ext4_dir_rec_len ( 1 , NULL ) )
2012-12-10 23:05:59 +04:00
return - ENOSPC ;
ret = ext4_update_inline_data ( handle , dir ,
new_size + EXT4_MIN_INLINE_DATA_SIZE ) ;
if ( ret )
return ret ;
ext4_update_final_de ( ext4_get_inline_xattr_pos ( dir , iloc ) , old_size ,
EXT4_I ( dir ) - > i_inline_size -
EXT4_MIN_INLINE_DATA_SIZE ) ;
dir - > i_size = EXT4_I ( dir ) - > i_disksize = EXT4_I ( dir ) - > i_inline_size ;
return 0 ;
}
static void ext4_restore_inline_data ( handle_t * handle , struct inode * inode ,
struct ext4_iloc * iloc ,
void * buf , int inline_size )
{
2022-01-17 15:11:47 +03:00
int ret ;
ret = ext4_create_inline_data ( handle , inode , inline_size ) ;
if ( ret ) {
ext4_msg ( inode - > i_sb , KERN_EMERG ,
" error restoring inline_data for inode -- potential data loss! (inode %lu, error %d) " ,
inode - > i_ino , ret ) ;
return ;
}
2012-12-10 23:05:59 +04:00
ext4_write_inline_data ( inode , iloc , buf , 0 , inline_size ) ;
ext4_set_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) ;
}
static int ext4_finish_convert_inline_dir ( handle_t * handle ,
struct inode * inode ,
struct buffer_head * dir_block ,
void * buf ,
int inline_size )
{
int err , csum_size = 0 , header_size = 0 ;
struct ext4_dir_entry_2 * de ;
void * target = dir_block - > b_data ;
/*
* First create " . " and " .. " and then copy the dir information
* back to the block .
*/
2022-04-01 11:13:21 +03:00
de = target ;
2012-12-10 23:05:59 +04:00
de = ext4_init_dot_dotdot ( inode , de ,
inode - > i_sb - > s_blocksize , csum_size ,
le32_to_cpu ( ( ( struct ext4_dir_entry_2 * ) buf ) - > inode ) , 1 ) ;
header_size = ( void * ) de - target ;
memcpy ( ( void * ) de , buf + EXT4_INLINE_DOTDOT_SIZE ,
inline_size - EXT4_INLINE_DOTDOT_SIZE ) ;
2014-10-13 11:36:16 +04:00
if ( ext4_has_metadata_csum ( inode - > i_sb ) )
2012-12-10 23:05:59 +04:00
csum_size = sizeof ( struct ext4_dir_entry_tail ) ;
inode - > i_size = inode - > i_sb - > s_blocksize ;
i_size_write ( inode , inode - > i_sb - > s_blocksize ) ;
EXT4_I ( inode ) - > i_disksize = inode - > i_sb - > s_blocksize ;
ext4_update_final_de ( dir_block - > b_data ,
inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size ,
inode - > i_sb - > s_blocksize - csum_size ) ;
2019-06-21 23:31:47 +03:00
if ( csum_size )
ext4_initialize_dirent_tail ( dir_block ,
inode - > i_sb - > s_blocksize ) ;
2012-12-10 23:05:59 +04:00
set_buffer_uptodate ( dir_block ) ;
2019-06-21 22:49:26 +03:00
err = ext4_handle_dirty_dirblock ( handle , inode , dir_block ) ;
2012-12-10 23:05:59 +04:00
if ( err )
2017-03-15 21:52:02 +03:00
return err ;
2012-12-10 23:05:59 +04:00
set_buffer_verified ( dir_block ) ;
2017-03-15 21:52:02 +03:00
return ext4_mark_inode_dirty ( handle , inode ) ;
2012-12-10 23:05:59 +04:00
}
static int ext4_convert_inline_data_nolock ( handle_t * handle ,
struct inode * inode ,
struct ext4_iloc * iloc )
{
int error ;
void * buf = NULL ;
struct buffer_head * data_bh = NULL ;
struct ext4_map_blocks map ;
int inline_size ;
inline_size = ext4_get_inline_size ( inode ) ;
buf = kmalloc ( inline_size , GFP_NOFS ) ;
if ( ! buf ) {
error = - ENOMEM ;
goto out ;
}
error = ext4_read_inline_data ( inode , buf , inline_size , iloc ) ;
if ( error < 0 )
goto out ;
2014-07-28 21:06:26 +04:00
/*
* Make sure the inline directory entries pass checks before we try to
* convert them , so that we avoid touching stuff that needs fsck .
*/
if ( S_ISDIR ( inode - > i_mode ) ) {
error = ext4_check_all_de ( inode , iloc - > bh ,
buf + EXT4_INLINE_DOTDOT_SIZE ,
inline_size - EXT4_INLINE_DOTDOT_SIZE ) ;
if ( error )
goto out ;
}
2012-12-10 23:05:59 +04:00
error = ext4_destroy_inline_data_nolock ( handle , inode ) ;
if ( error )
goto out ;
map . m_lblk = 0 ;
map . m_len = 1 ;
map . m_flags = 0 ;
error = ext4_map_blocks ( handle , inode , & map , EXT4_GET_BLOCKS_CREATE ) ;
if ( error < 0 )
goto out_restore ;
if ( ! ( map . m_flags & EXT4_MAP_MAPPED ) ) {
error = - EIO ;
goto out_restore ;
}
data_bh = sb_getblk ( inode - > i_sb , map . m_pblk ) ;
if ( ! data_bh ) {
2013-01-13 01:19:36 +04:00
error = - ENOMEM ;
2012-12-10 23:05:59 +04:00
goto out_restore ;
}
lock_buffer ( data_bh ) ;
2021-08-16 12:57:04 +03:00
error = ext4_journal_get_create_access ( handle , inode - > i_sb , data_bh ,
EXT4_JTR_NONE ) ;
2012-12-10 23:05:59 +04:00
if ( error ) {
unlock_buffer ( data_bh ) ;
error = - EIO ;
goto out_restore ;
}
memset ( data_bh - > b_data , 0 , inode - > i_sb - > s_blocksize ) ;
if ( ! S_ISDIR ( inode - > i_mode ) ) {
memcpy ( data_bh - > b_data , buf , inline_size ) ;
set_buffer_uptodate ( data_bh ) ;
error = ext4_handle_dirty_metadata ( handle ,
inode , data_bh ) ;
} else {
error = ext4_finish_convert_inline_dir ( handle , inode , data_bh ,
buf , inline_size ) ;
}
unlock_buffer ( data_bh ) ;
out_restore :
if ( error )
ext4_restore_inline_data ( handle , inode , iloc , buf , inline_size ) ;
out :
brelse ( data_bh ) ;
kfree ( buf ) ;
return error ;
}
/*
* Try to add the new entry to the inline data .
* If succeeds , return 0. If not , extended the inline dir and copied data to
* the new created block .
*/
2015-05-18 20:14:47 +03:00
int ext4_try_add_inline_entry ( handle_t * handle , struct ext4_filename * fname ,
2016-01-09 00:00:31 +03:00
struct inode * dir , struct inode * inode )
2012-12-10 23:05:59 +04:00
{
2020-04-27 04:34:37 +03:00
int ret , ret2 , inline_size , no_expand ;
2012-12-10 23:05:59 +04:00
void * inline_start ;
struct ext4_iloc iloc ;
ret = ext4_get_inode_loc ( dir , & iloc ) ;
if ( ret )
return ret ;
2017-01-12 05:50:46 +03:00
ext4_write_lock_xattr ( dir , & no_expand ) ;
2012-12-10 23:05:59 +04:00
if ( ! ext4_has_inline_data ( dir ) )
goto out ;
inline_start = ( void * ) ext4_raw_inode ( & iloc ) - > i_block +
EXT4_INLINE_DOTDOT_SIZE ;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE ;
2016-01-09 00:00:31 +03:00
ret = ext4_add_dirent_to_inline ( handle , fname , dir , inode , & iloc ,
2012-12-10 23:05:59 +04:00
inline_start , inline_size ) ;
if ( ret ! = - ENOSPC )
goto out ;
/* check whether it can be inserted to inline xattr space. */
inline_size = EXT4_I ( dir ) - > i_inline_size -
EXT4_MIN_INLINE_DATA_SIZE ;
if ( ! inline_size ) {
/* Try to use the xattr space.*/
ret = ext4_update_inline_dir ( handle , dir , & iloc ) ;
if ( ret & & ret ! = - ENOSPC )
goto out ;
inline_size = EXT4_I ( dir ) - > i_inline_size -
EXT4_MIN_INLINE_DATA_SIZE ;
}
if ( inline_size ) {
inline_start = ext4_get_inline_xattr_pos ( dir , & iloc ) ;
2016-01-09 00:00:31 +03:00
ret = ext4_add_dirent_to_inline ( handle , fname , dir ,
2015-05-18 20:14:47 +03:00
inode , & iloc , inline_start ,
inline_size ) ;
2012-12-10 23:05:59 +04:00
if ( ret ! = - ENOSPC )
goto out ;
}
/*
* The inline space is filled up , so create a new block for it .
* As the extent tree will be created , we have to save the inline
* dir first .
*/
ret = ext4_convert_inline_data_nolock ( handle , dir , & iloc ) ;
out :
2017-01-12 05:50:46 +03:00
ext4_write_unlock_xattr ( dir , & no_expand ) ;
2020-04-27 04:34:37 +03:00
ret2 = ext4_mark_inode_dirty ( handle , dir ) ;
if ( unlikely ( ret2 & & ! ret ) )
ret = ret2 ;
2012-12-10 23:05:59 +04:00
brelse ( iloc . bh ) ;
return ret ;
}
2013-04-20 01:53:09 +04:00
/*
* This function fills a red - black tree with information from an
* inlined dir . It returns the number directory entries loaded
* into the tree . If there is an error it is returned in err .
*/
2019-06-22 04:57:00 +03:00
int ext4_inlinedir_to_tree ( struct file * dir_file ,
struct inode * dir , ext4_lblk_t block ,
struct dx_hash_info * hinfo ,
__u32 start_hash , __u32 start_minor_hash ,
int * has_inline_data )
2013-04-20 01:53:09 +04:00
{
int err = 0 , count = 0 ;
unsigned int parent_ino ;
int pos ;
struct ext4_dir_entry_2 * de ;
struct inode * inode = file_inode ( dir_file ) ;
int ret , inline_size = 0 ;
struct ext4_iloc iloc ;
void * dir_buf = NULL ;
struct ext4_dir_entry_2 fake ;
2016-07-10 21:01:03 +03:00
struct fscrypt_str tmp_str ;
2013-04-20 01:53:09 +04:00
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret )
return ret ;
down_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( ! ext4_has_inline_data ( inode ) ) {
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
* has_inline_data = 0 ;
goto out ;
}
inline_size = ext4_get_inline_size ( inode ) ;
dir_buf = kmalloc ( inline_size , GFP_NOFS ) ;
if ( ! dir_buf ) {
ret = - ENOMEM ;
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
goto out ;
}
ret = ext4_read_inline_data ( inode , dir_buf , inline_size , & iloc ) ;
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( ret < 0 )
goto out ;
pos = 0 ;
parent_ino = le32_to_cpu ( ( ( struct ext4_dir_entry_2 * ) dir_buf ) - > inode ) ;
while ( pos < inline_size ) {
/*
* As inlined dir doesn ' t store any information about ' . ' and
* only the inode number of ' . . ' is stored , we have to handle
* them differently .
*/
if ( pos = = 0 ) {
fake . inode = cpu_to_le32 ( inode - > i_ino ) ;
fake . name_len = 1 ;
strcpy ( fake . name , " . " ) ;
fake . rec_len = ext4_rec_len_to_disk (
2021-03-19 10:34:13 +03:00
ext4_dir_rec_len ( fake . name_len , NULL ) ,
inline_size ) ;
2013-04-20 01:53:09 +04:00
ext4_set_de_type ( inode - > i_sb , & fake , S_IFDIR ) ;
de = & fake ;
pos = EXT4_INLINE_DOTDOT_OFFSET ;
} else if ( pos = = EXT4_INLINE_DOTDOT_OFFSET ) {
fake . inode = cpu_to_le32 ( parent_ino ) ;
fake . name_len = 2 ;
strcpy ( fake . name , " .. " ) ;
fake . rec_len = ext4_rec_len_to_disk (
2021-03-19 10:34:13 +03:00
ext4_dir_rec_len ( fake . name_len , NULL ) ,
inline_size ) ;
2013-04-20 01:53:09 +04:00
ext4_set_de_type ( inode - > i_sb , & fake , S_IFDIR ) ;
de = & fake ;
pos = EXT4_INLINE_DOTDOT_SIZE ;
} else {
de = ( struct ext4_dir_entry_2 * ) ( dir_buf + pos ) ;
pos + = ext4_rec_len_from_disk ( de - > rec_len , inline_size ) ;
if ( ext4_check_dir_entry ( inode , dir_file , de ,
iloc . bh , dir_buf ,
inline_size , pos ) ) {
ret = count ;
goto out ;
}
}
2021-03-19 10:34:13 +03:00
if ( ext4_hash_in_dirent ( dir ) ) {
hinfo - > hash = EXT4_DIRENT_HASH ( de ) ;
hinfo - > minor_hash = EXT4_DIRENT_MINOR_HASH ( de ) ;
} else {
ext4fs_dirhash ( dir , de - > name , de - > name_len , hinfo ) ;
}
2013-04-20 01:53:09 +04:00
if ( ( hinfo - > hash < start_hash ) | |
( ( hinfo - > hash = = start_hash ) & &
( hinfo - > minor_hash < start_minor_hash ) ) )
continue ;
if ( de - > inode = = 0 )
continue ;
2015-04-12 07:56:26 +03:00
tmp_str . name = de - > name ;
tmp_str . len = de - > name_len ;
err = ext4_htree_store_dirent ( dir_file , hinfo - > hash ,
hinfo - > minor_hash , de , & tmp_str ) ;
2013-04-20 01:53:09 +04:00
if ( err ) {
2019-08-12 21:29:38 +03:00
ret = err ;
2013-04-20 01:53:09 +04:00
goto out ;
}
count + + ;
}
ret = count ;
out :
kfree ( dir_buf ) ;
brelse ( iloc . bh ) ;
return ret ;
}
2013-04-20 01:55:33 +04:00
/*
* So this function is called when the volume is mkfsed with
* dir_index disabled . In order to keep f_pos persistent
* after we convert from an inlined dir to a blocked based ,
* we just pretend that we are a normal dir and return the
* offset as if ' . ' and ' . . ' really take place .
*
*/
2013-05-18 00:08:53 +04:00
int ext4_read_inline_dir ( struct file * file ,
struct dir_context * ctx ,
2012-12-10 23:05:59 +04:00
int * has_inline_data )
{
unsigned int offset , parent_ino ;
2013-05-18 00:08:53 +04:00
int i ;
2012-12-10 23:05:59 +04:00
struct ext4_dir_entry_2 * de ;
struct super_block * sb ;
2013-05-18 00:08:53 +04:00
struct inode * inode = file_inode ( file ) ;
2012-12-10 23:05:59 +04:00
int ret , inline_size = 0 ;
struct ext4_iloc iloc ;
void * dir_buf = NULL ;
2013-04-20 01:55:33 +04:00
int dotdot_offset , dotdot_size , extra_offset , extra_size ;
2012-12-10 23:05:59 +04:00
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret )
return ret ;
down_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( ! ext4_has_inline_data ( inode ) ) {
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
* has_inline_data = 0 ;
goto out ;
}
inline_size = ext4_get_inline_size ( inode ) ;
dir_buf = kmalloc ( inline_size , GFP_NOFS ) ;
if ( ! dir_buf ) {
ret = - ENOMEM ;
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
goto out ;
}
ret = ext4_read_inline_data ( inode , dir_buf , inline_size , & iloc ) ;
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( ret < 0 )
goto out ;
2013-10-30 16:07:20 +04:00
ret = 0 ;
2012-12-10 23:05:59 +04:00
sb = inode - > i_sb ;
parent_ino = le32_to_cpu ( ( ( struct ext4_dir_entry_2 * ) dir_buf ) - > inode ) ;
2013-05-18 00:08:53 +04:00
offset = ctx - > pos ;
2013-04-20 01:55:33 +04:00
/*
* dotdot_offset and dotdot_size is the real offset and
* size for " .. " and " . " if the dir is block based while
* the real size for them are only EXT4_INLINE_DOTDOT_SIZE .
* So we will use extra_offset and extra_size to indicate them
* during the inline dir iteration .
*/
2021-03-19 10:34:13 +03:00
dotdot_offset = ext4_dir_rec_len ( 1 , NULL ) ;
dotdot_size = dotdot_offset + ext4_dir_rec_len ( 2 , NULL ) ;
2013-04-20 01:55:33 +04:00
extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE ;
extra_size = extra_offset + inline_size ;
2012-12-10 23:05:59 +04:00
2013-05-18 00:08:53 +04:00
/*
* If the version has changed since the last call to
* readdir ( 2 ) , then we might be pointing to an invalid
* dirent right now . Scan from the start of the inline
* dir to make sure .
*/
2018-02-01 16:15:25 +03:00
if ( ! inode_eq_iversion ( inode , file - > f_version ) ) {
2013-05-18 00:08:53 +04:00
for ( i = 0 ; i < extra_size & & i < offset ; ) {
/*
* " . " is with offset 0 and
* " .. " is dotdot_offset .
*/
if ( ! i ) {
i = dotdot_offset ;
continue ;
} else if ( i = = dotdot_offset ) {
i = dotdot_size ;
2013-04-20 01:55:33 +04:00
continue ;
}
2013-05-18 00:08:53 +04:00
/* for other entry, the real offset in
* the buf has to be tuned accordingly .
*/
de = ( struct ext4_dir_entry_2 * )
( dir_buf + i - extra_offset ) ;
/* It's too expensive to do a full
* dirent test each time round this
* loop , but we do have to test at
* least that it is non - zero . A
* failure will be detected in the
* dirent test below . */
if ( ext4_rec_len_from_disk ( de - > rec_len , extra_size )
2021-03-19 10:34:13 +03:00
< ext4_dir_rec_len ( 1 , NULL ) )
2013-05-18 00:08:53 +04:00
break ;
i + = ext4_rec_len_from_disk ( de - > rec_len ,
extra_size ) ;
}
offset = i ;
ctx - > pos = offset ;
2018-01-09 16:21:39 +03:00
file - > f_version = inode_query_iversion ( inode ) ;
2013-05-18 00:08:53 +04:00
}
2012-12-10 23:05:59 +04:00
2013-05-18 00:08:53 +04:00
while ( ctx - > pos < extra_size ) {
if ( ctx - > pos = = 0 ) {
if ( ! dir_emit ( ctx , " . " , 1 , inode - > i_ino , DT_DIR ) )
goto out ;
ctx - > pos = dotdot_offset ;
continue ;
}
2012-12-10 23:05:59 +04:00
2013-05-18 00:08:53 +04:00
if ( ctx - > pos = = dotdot_offset ) {
if ( ! dir_emit ( ctx , " .. " , 2 , parent_ino , DT_DIR ) )
goto out ;
ctx - > pos = dotdot_size ;
continue ;
}
2012-12-10 23:05:59 +04:00
2013-05-18 00:08:53 +04:00
de = ( struct ext4_dir_entry_2 * )
( dir_buf + ctx - > pos - extra_offset ) ;
if ( ext4_check_dir_entry ( inode , file , de , iloc . bh , dir_buf ,
extra_size , ctx - > pos ) )
goto out ;
if ( le32_to_cpu ( de - > inode ) ) {
if ( ! dir_emit ( ctx , de - > name , de - > name_len ,
le32_to_cpu ( de - > inode ) ,
get_dtype ( sb , de - > file_type ) ) )
2012-12-10 23:05:59 +04:00
goto out ;
}
2013-05-18 00:08:53 +04:00
ctx - > pos + = ext4_rec_len_from_disk ( de - > rec_len , extra_size ) ;
2012-12-10 23:05:59 +04:00
}
out :
kfree ( dir_buf ) ;
brelse ( iloc . bh ) ;
return ret ;
}
2022-06-30 12:01:00 +03:00
void * ext4_read_inline_link ( struct inode * inode )
{
struct ext4_iloc iloc ;
int ret , inline_size ;
void * link ;
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret )
return ERR_PTR ( ret ) ;
ret = - ENOMEM ;
inline_size = ext4_get_inline_size ( inode ) ;
link = kmalloc ( inline_size + 1 , GFP_NOFS ) ;
if ( ! link )
goto out ;
ret = ext4_read_inline_data ( inode , link , inline_size , & iloc ) ;
if ( ret < 0 ) {
kfree ( link ) ;
goto out ;
}
nd_terminate_link ( link , inode - > i_size , ret ) ;
out :
if ( ret < 0 )
link = ERR_PTR ( ret ) ;
brelse ( iloc . bh ) ;
return link ;
}
2012-12-10 23:06:01 +04:00
struct buffer_head * ext4_get_first_inline_block ( struct inode * inode ,
struct ext4_dir_entry_2 * * parent_de ,
int * retval )
{
struct ext4_iloc iloc ;
* retval = ext4_get_inode_loc ( inode , & iloc ) ;
if ( * retval )
return NULL ;
* parent_de = ( struct ext4_dir_entry_2 * ) ext4_raw_inode ( & iloc ) - > i_block ;
return iloc . bh ;
}
2012-12-10 23:05:59 +04:00
/*
* Try to create the inline data for the new dir .
* If it succeeds , return 0 , otherwise return the error .
* In case of ENOSPC , the caller should create the normal disk layout dir .
*/
int ext4_try_create_inline_dir ( handle_t * handle , struct inode * parent ,
struct inode * inode )
{
int ret , inline_size = EXT4_MIN_INLINE_DATA_SIZE ;
struct ext4_iloc iloc ;
struct ext4_dir_entry_2 * de ;
ret = ext4_get_inode_loc ( inode , & iloc ) ;
if ( ret )
return ret ;
ret = ext4_prepare_inline_data ( handle , inode , inline_size ) ;
if ( ret )
goto out ;
/*
* For inline dir , we only save the inode information for the " .. "
* and create a fake dentry to cover the left space .
*/
de = ( struct ext4_dir_entry_2 * ) ext4_raw_inode ( & iloc ) - > i_block ;
de - > inode = cpu_to_le32 ( parent - > i_ino ) ;
de = ( struct ext4_dir_entry_2 * ) ( ( void * ) de + EXT4_INLINE_DOTDOT_SIZE ) ;
de - > inode = 0 ;
de - > rec_len = ext4_rec_len_to_disk (
inline_size - EXT4_INLINE_DOTDOT_SIZE ,
inline_size ) ;
set_nlink ( inode , 2 ) ;
inode - > i_size = EXT4_I ( inode ) - > i_disksize = inline_size ;
out :
brelse ( iloc . bh ) ;
return ret ;
}
2012-12-10 23:06:00 +04:00
struct buffer_head * ext4_find_inline_entry ( struct inode * dir ,
2015-05-18 20:14:47 +03:00
struct ext4_filename * fname ,
2012-12-10 23:06:00 +04:00
struct ext4_dir_entry_2 * * res_dir ,
int * has_inline_data )
{
int ret ;
struct ext4_iloc iloc ;
void * inline_start ;
int inline_size ;
if ( ext4_get_inode_loc ( dir , & iloc ) )
return NULL ;
down_read ( & EXT4_I ( dir ) - > xattr_sem ) ;
if ( ! ext4_has_inline_data ( dir ) ) {
* has_inline_data = 0 ;
goto out ;
}
inline_start = ( void * ) ext4_raw_inode ( & iloc ) - > i_block +
EXT4_INLINE_DOTDOT_SIZE ;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE ;
2015-05-18 20:14:47 +03:00
ret = ext4_search_dir ( iloc . bh , inline_start , inline_size ,
2017-05-25 01:10:49 +03:00
dir , fname , 0 , res_dir ) ;
2012-12-10 23:06:00 +04:00
if ( ret = = 1 )
goto out_find ;
if ( ret < 0 )
goto out ;
if ( ext4_get_inline_size ( dir ) = = EXT4_MIN_INLINE_DATA_SIZE )
goto out ;
inline_start = ext4_get_inline_xattr_pos ( dir , & iloc ) ;
inline_size = ext4_get_inline_size ( dir ) - EXT4_MIN_INLINE_DATA_SIZE ;
2015-05-18 20:14:47 +03:00
ret = ext4_search_dir ( iloc . bh , inline_start , inline_size ,
2017-05-25 01:10:49 +03:00
dir , fname , 0 , res_dir ) ;
2012-12-10 23:06:00 +04:00
if ( ret = = 1 )
goto out_find ;
out :
brelse ( iloc . bh ) ;
iloc . bh = NULL ;
out_find :
up_read ( & EXT4_I ( dir ) - > xattr_sem ) ;
return iloc . bh ;
}
2012-12-10 23:06:00 +04:00
int ext4_delete_inline_entry ( handle_t * handle ,
struct inode * dir ,
struct ext4_dir_entry_2 * de_del ,
struct buffer_head * bh ,
int * has_inline_data )
{
2017-01-12 05:50:46 +03:00
int err , inline_size , no_expand ;
2012-12-10 23:06:00 +04:00
struct ext4_iloc iloc ;
void * inline_start ;
err = ext4_get_inode_loc ( dir , & iloc ) ;
if ( err )
return err ;
2017-01-12 05:50:46 +03:00
ext4_write_lock_xattr ( dir , & no_expand ) ;
2012-12-10 23:06:00 +04:00
if ( ! ext4_has_inline_data ( dir ) ) {
* has_inline_data = 0 ;
goto out ;
}
if ( ( void * ) de_del - ( ( void * ) ext4_raw_inode ( & iloc ) - > i_block ) <
EXT4_MIN_INLINE_DATA_SIZE ) {
inline_start = ( void * ) ext4_raw_inode ( & iloc ) - > i_block +
EXT4_INLINE_DOTDOT_SIZE ;
inline_size = EXT4_MIN_INLINE_DATA_SIZE -
EXT4_INLINE_DOTDOT_SIZE ;
} else {
inline_start = ext4_get_inline_xattr_pos ( dir , & iloc ) ;
inline_size = ext4_get_inline_size ( dir ) -
EXT4_MIN_INLINE_DATA_SIZE ;
}
2014-05-13 06:06:43 +04:00
BUFFER_TRACE ( bh , " get_write_access " ) ;
2021-08-16 12:57:04 +03:00
err = ext4_journal_get_write_access ( handle , dir - > i_sb , bh ,
EXT4_JTR_NONE ) ;
2012-12-10 23:06:00 +04:00
if ( err )
goto out ;
2020-08-10 11:07:05 +03:00
err = ext4_generic_delete_entry ( dir , de_del , bh ,
2012-12-10 23:06:00 +04:00
inline_start , inline_size , 0 ) ;
if ( err )
goto out ;
ext4_show_inline_dir ( dir , iloc . bh , inline_start , inline_size ) ;
out :
2017-01-12 05:50:46 +03:00
ext4_write_unlock_xattr ( dir , & no_expand ) ;
2017-01-12 06:14:49 +03:00
if ( likely ( err = = 0 ) )
err = ext4_mark_inode_dirty ( handle , dir ) ;
2012-12-10 23:06:00 +04:00
brelse ( iloc . bh ) ;
if ( err ! = - ENOENT )
ext4_std_error ( dir - > i_sb , err ) ;
return err ;
}
2012-12-10 23:06:01 +04:00
/*
* Get the inline dentry at offset .
*/
static inline struct ext4_dir_entry_2 *
ext4_get_inline_entry ( struct inode * inode ,
struct ext4_iloc * iloc ,
unsigned int offset ,
void * * inline_start ,
int * inline_size )
{
void * inline_pos ;
BUG_ON ( offset > ext4_get_inline_size ( inode ) ) ;
if ( offset < EXT4_MIN_INLINE_DATA_SIZE ) {
inline_pos = ( void * ) ext4_raw_inode ( iloc ) - > i_block ;
* inline_size = EXT4_MIN_INLINE_DATA_SIZE ;
} else {
inline_pos = ext4_get_inline_xattr_pos ( inode , iloc ) ;
offset - = EXT4_MIN_INLINE_DATA_SIZE ;
* inline_size = ext4_get_inline_size ( inode ) -
EXT4_MIN_INLINE_DATA_SIZE ;
}
if ( inline_start )
* inline_start = inline_pos ;
return ( struct ext4_dir_entry_2 * ) ( inline_pos + offset ) ;
}
2016-07-10 21:01:03 +03:00
bool empty_inline_dir ( struct inode * dir , int * has_inline_data )
2012-12-10 23:06:01 +04:00
{
int err , inline_size ;
struct ext4_iloc iloc ;
2018-08-27 16:22:45 +03:00
size_t inline_len ;
2012-12-10 23:06:01 +04:00
void * inline_pos ;
unsigned int offset ;
struct ext4_dir_entry_2 * de ;
ext4: fix fs corruption when tring to remove a non-empty directory with IO error
We inject IO error when rmdir non empty direcory, then got issue as follows:
step1: mkfs.ext4 -F /dev/sda
step2: mount /dev/sda test
step3: cd test
step4: mkdir -p 1/2
step5: rmdir 1
[ 110.920551] ext4_empty_dir: inject fault
[ 110.921926] EXT4-fs warning (device sda): ext4_rmdir:3113: inode #12:
comm rmdir: empty directory '1' has too many links (3)
step6: cd ..
step7: umount test
step8: fsck.ext4 -f /dev/sda
e2fsck 1.42.9 (28-Dec-2013)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Entry '..' in .../??? (13) has deleted/unused inode 12. Clear<y>? yes
Pass 3: Checking directory connectivity
Unconnected directory inode 13 (...)
Connect to /lost+found<y>? yes
Pass 4: Checking reference counts
Inode 13 ref count is 3, should be 2. Fix<y>? yes
Pass 5: Checking group summary information
/dev/sda: ***** FILE SYSTEM WAS MODIFIED *****
/dev/sda: 12/131072 files (0.0% non-contiguous), 26157/524288 blocks
ext4_rmdir
if (!ext4_empty_dir(inode))
goto end_rmdir;
ext4_empty_dir
bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh))
return true;
Now if read directory block failed, 'ext4_empty_dir' will return true, assume
directory is empty. Obviously, it will lead to above issue.
To solve this issue, if read directory block failed 'ext4_empty_dir' just
return false. To avoid making things worse when file system is already
corrupted, 'ext4_empty_dir' also return false.
Signed-off-by: Ye Bin <yebin10@huawei.com>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20220228024815.3952506-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2022-02-28 05:48:15 +03:00
bool ret = false ;
2012-12-10 23:06:01 +04:00
err = ext4_get_inode_loc ( dir , & iloc ) ;
if ( err ) {
2020-03-29 02:33:43 +03:00
EXT4_ERROR_INODE_ERR ( dir , - err ,
" error %d getting inode %lu block " ,
err , dir - > i_ino ) ;
ext4: fix fs corruption when tring to remove a non-empty directory with IO error
We inject IO error when rmdir non empty direcory, then got issue as follows:
step1: mkfs.ext4 -F /dev/sda
step2: mount /dev/sda test
step3: cd test
step4: mkdir -p 1/2
step5: rmdir 1
[ 110.920551] ext4_empty_dir: inject fault
[ 110.921926] EXT4-fs warning (device sda): ext4_rmdir:3113: inode #12:
comm rmdir: empty directory '1' has too many links (3)
step6: cd ..
step7: umount test
step8: fsck.ext4 -f /dev/sda
e2fsck 1.42.9 (28-Dec-2013)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Entry '..' in .../??? (13) has deleted/unused inode 12. Clear<y>? yes
Pass 3: Checking directory connectivity
Unconnected directory inode 13 (...)
Connect to /lost+found<y>? yes
Pass 4: Checking reference counts
Inode 13 ref count is 3, should be 2. Fix<y>? yes
Pass 5: Checking group summary information
/dev/sda: ***** FILE SYSTEM WAS MODIFIED *****
/dev/sda: 12/131072 files (0.0% non-contiguous), 26157/524288 blocks
ext4_rmdir
if (!ext4_empty_dir(inode))
goto end_rmdir;
ext4_empty_dir
bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh))
return true;
Now if read directory block failed, 'ext4_empty_dir' will return true, assume
directory is empty. Obviously, it will lead to above issue.
To solve this issue, if read directory block failed 'ext4_empty_dir' just
return false. To avoid making things worse when file system is already
corrupted, 'ext4_empty_dir' also return false.
Signed-off-by: Ye Bin <yebin10@huawei.com>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20220228024815.3952506-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2022-02-28 05:48:15 +03:00
return false ;
2012-12-10 23:06:01 +04:00
}
down_read ( & EXT4_I ( dir ) - > xattr_sem ) ;
if ( ! ext4_has_inline_data ( dir ) ) {
* has_inline_data = 0 ;
ext4: fix fs corruption when tring to remove a non-empty directory with IO error
We inject IO error when rmdir non empty direcory, then got issue as follows:
step1: mkfs.ext4 -F /dev/sda
step2: mount /dev/sda test
step3: cd test
step4: mkdir -p 1/2
step5: rmdir 1
[ 110.920551] ext4_empty_dir: inject fault
[ 110.921926] EXT4-fs warning (device sda): ext4_rmdir:3113: inode #12:
comm rmdir: empty directory '1' has too many links (3)
step6: cd ..
step7: umount test
step8: fsck.ext4 -f /dev/sda
e2fsck 1.42.9 (28-Dec-2013)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Entry '..' in .../??? (13) has deleted/unused inode 12. Clear<y>? yes
Pass 3: Checking directory connectivity
Unconnected directory inode 13 (...)
Connect to /lost+found<y>? yes
Pass 4: Checking reference counts
Inode 13 ref count is 3, should be 2. Fix<y>? yes
Pass 5: Checking group summary information
/dev/sda: ***** FILE SYSTEM WAS MODIFIED *****
/dev/sda: 12/131072 files (0.0% non-contiguous), 26157/524288 blocks
ext4_rmdir
if (!ext4_empty_dir(inode))
goto end_rmdir;
ext4_empty_dir
bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh))
return true;
Now if read directory block failed, 'ext4_empty_dir' will return true, assume
directory is empty. Obviously, it will lead to above issue.
To solve this issue, if read directory block failed 'ext4_empty_dir' just
return false. To avoid making things worse when file system is already
corrupted, 'ext4_empty_dir' also return false.
Signed-off-by: Ye Bin <yebin10@huawei.com>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20220228024815.3952506-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2022-02-28 05:48:15 +03:00
ret = true ;
2012-12-10 23:06:01 +04:00
goto out ;
}
de = ( struct ext4_dir_entry_2 * ) ext4_raw_inode ( & iloc ) - > i_block ;
if ( ! le32_to_cpu ( de - > inode ) ) {
ext4_warning ( dir - > i_sb ,
" bad inline directory (dir #%lu) - no `..' " ,
dir - > i_ino ) ;
goto out ;
}
2018-08-27 16:22:45 +03:00
inline_len = ext4_get_inline_size ( dir ) ;
2012-12-10 23:06:01 +04:00
offset = EXT4_INLINE_DOTDOT_SIZE ;
2018-08-27 16:22:45 +03:00
while ( offset < inline_len ) {
2012-12-10 23:06:01 +04:00
de = ext4_get_inline_entry ( dir , & iloc , offset ,
& inline_pos , & inline_size ) ;
if ( ext4_check_dir_entry ( dir , NULL , de ,
iloc . bh , inline_pos ,
inline_size , offset ) ) {
ext4_warning ( dir - > i_sb ,
" bad inline directory (dir #%lu) - "
" inode %u, rec_len %u, name_len %d "
2016-04-27 08:11:21 +03:00
" inline size %d " ,
2012-12-10 23:06:01 +04:00
dir - > i_ino , le32_to_cpu ( de - > inode ) ,
le16_to_cpu ( de - > rec_len ) , de - > name_len ,
inline_size ) ;
goto out ;
}
if ( le32_to_cpu ( de - > inode ) ) {
goto out ;
}
offset + = ext4_rec_len_from_disk ( de - > rec_len , inline_size ) ;
}
ext4: fix fs corruption when tring to remove a non-empty directory with IO error
We inject IO error when rmdir non empty direcory, then got issue as follows:
step1: mkfs.ext4 -F /dev/sda
step2: mount /dev/sda test
step3: cd test
step4: mkdir -p 1/2
step5: rmdir 1
[ 110.920551] ext4_empty_dir: inject fault
[ 110.921926] EXT4-fs warning (device sda): ext4_rmdir:3113: inode #12:
comm rmdir: empty directory '1' has too many links (3)
step6: cd ..
step7: umount test
step8: fsck.ext4 -f /dev/sda
e2fsck 1.42.9 (28-Dec-2013)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Entry '..' in .../??? (13) has deleted/unused inode 12. Clear<y>? yes
Pass 3: Checking directory connectivity
Unconnected directory inode 13 (...)
Connect to /lost+found<y>? yes
Pass 4: Checking reference counts
Inode 13 ref count is 3, should be 2. Fix<y>? yes
Pass 5: Checking group summary information
/dev/sda: ***** FILE SYSTEM WAS MODIFIED *****
/dev/sda: 12/131072 files (0.0% non-contiguous), 26157/524288 blocks
ext4_rmdir
if (!ext4_empty_dir(inode))
goto end_rmdir;
ext4_empty_dir
bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh))
return true;
Now if read directory block failed, 'ext4_empty_dir' will return true, assume
directory is empty. Obviously, it will lead to above issue.
To solve this issue, if read directory block failed 'ext4_empty_dir' just
return false. To avoid making things worse when file system is already
corrupted, 'ext4_empty_dir' also return false.
Signed-off-by: Ye Bin <yebin10@huawei.com>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20220228024815.3952506-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2022-02-28 05:48:15 +03:00
ret = true ;
2012-12-10 23:06:01 +04:00
out :
up_read ( & EXT4_I ( dir ) - > xattr_sem ) ;
brelse ( iloc . bh ) ;
return ret ;
}
2012-12-10 23:04:46 +04:00
int ext4_destroy_inline_data ( handle_t * handle , struct inode * inode )
{
2017-01-12 05:50:46 +03:00
int ret , no_expand ;
2012-12-10 23:04:46 +04:00
2017-01-12 05:50:46 +03:00
ext4_write_lock_xattr ( inode , & no_expand ) ;
2012-12-10 23:04:46 +04:00
ret = ext4_destroy_inline_data_nolock ( handle , inode ) ;
2017-01-12 05:50:46 +03:00
ext4_write_unlock_xattr ( inode , & no_expand ) ;
2012-12-10 23:04:46 +04:00
return ret ;
}
2012-12-10 23:06:02 +04:00
2017-10-02 00:57:54 +03:00
int ext4_inline_data_iomap ( struct inode * inode , struct iomap * iomap )
{
__u64 addr ;
int error = - EAGAIN ;
struct ext4_iloc iloc ;
down_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
if ( ! ext4_has_inline_data ( inode ) )
goto out ;
error = ext4_get_inode_loc ( inode , & iloc ) ;
if ( error )
goto out ;
addr = ( __u64 ) iloc . bh - > b_blocknr < < inode - > i_sb - > s_blocksize_bits ;
addr + = ( char * ) ext4_raw_inode ( & iloc ) - iloc . bh - > b_data ;
addr + = offsetof ( struct ext4_inode , i_block ) ;
brelse ( iloc . bh ) ;
iomap - > addr = addr ;
iomap - > offset = 0 ;
iomap - > length = min_t ( loff_t , ext4_get_inline_size ( inode ) ,
i_size_read ( inode ) ) ;
2018-06-01 19:03:06 +03:00
iomap - > type = IOMAP_INLINE ;
iomap - > flags = 0 ;
2017-10-02 00:57:54 +03:00
out :
up_read ( & EXT4_I ( inode ) - > xattr_sem ) ;
return error ;
}
2017-01-23 03:35:49 +03:00
int ext4_inline_data_truncate ( struct inode * inode , int * has_inline )
2012-12-10 23:06:02 +04:00
{
handle_t * handle ;
2017-01-23 03:35:49 +03:00
int inline_size , value_len , needed_blocks , no_expand , err = 0 ;
2012-12-10 23:06:02 +04:00
size_t i_size ;
void * value = NULL ;
struct ext4_xattr_ibody_find is = {
. s = { . not_found = - ENODATA , } ,
} ;
struct ext4_xattr_info i = {
. name_index = EXT4_XATTR_INDEX_SYSTEM ,
. name = EXT4_XATTR_SYSTEM_DATA ,
} ;
needed_blocks = ext4_writepage_trans_blocks ( inode ) ;
2013-02-09 06:59:22 +04:00
handle = ext4_journal_start ( inode , EXT4_HT_INODE , needed_blocks ) ;
2012-12-10 23:06:02 +04:00
if ( IS_ERR ( handle ) )
2017-01-23 03:35:49 +03:00
return PTR_ERR ( handle ) ;
2012-12-10 23:06:02 +04:00
2017-01-12 05:50:46 +03:00
ext4_write_lock_xattr ( inode , & no_expand ) ;
2012-12-10 23:06:02 +04:00
if ( ! ext4_has_inline_data ( inode ) ) {
2020-11-03 05:29:02 +03:00
ext4_write_unlock_xattr ( inode , & no_expand ) ;
2012-12-10 23:06:02 +04:00
* has_inline = 0 ;
ext4_journal_stop ( handle ) ;
2017-01-23 03:35:49 +03:00
return 0 ;
2012-12-10 23:06:02 +04:00
}
2017-01-23 03:35:49 +03:00
if ( ( err = ext4_orphan_add ( handle , inode ) ) ! = 0 )
2012-12-10 23:06:02 +04:00
goto out ;
2017-01-23 03:35:49 +03:00
if ( ( err = ext4_get_inode_loc ( inode , & is . iloc ) ) ! = 0 )
2012-12-10 23:06:02 +04:00
goto out ;
down_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
i_size = inode - > i_size ;
inline_size = ext4_get_inline_size ( inode ) ;
EXT4_I ( inode ) - > i_disksize = i_size ;
if ( i_size < inline_size ) {
2021-08-19 17:49:26 +03:00
/*
* if there ' s inline data to truncate and this file was
* converted to extents after that inline data was written ,
* the extent status cache must be cleared to avoid leaving
* behind stale delayed allocated extent entries
*/
if ( ! ext4_test_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) ) {
retry :
err = ext4_es_remove_extent ( inode , 0 , EXT_MAX_BLOCKS ) ;
if ( err = = - ENOMEM ) {
mm: introduce memalloc_retry_wait()
Various places in the kernel - largely in filesystems - respond to a
memory allocation failure by looping around and re-trying. Some of
these cannot conveniently use __GFP_NOFAIL, for reasons such as:
- a GFP_ATOMIC allocation, which __GFP_NOFAIL doesn't work on
- a need to check for the process being signalled between failures
- the possibility that other recovery actions could be performed
- the allocation is quite deep in support code, and passing down an
extra flag to say if __GFP_NOFAIL is wanted would be clumsy.
Many of these currently use congestion_wait() which (in almost all
cases) simply waits the given timeout - congestion isn't tracked for
most devices.
It isn't clear what the best delay is for loops, but it is clear that
the various filesystems shouldn't be responsible for choosing a timeout.
This patch introduces memalloc_retry_wait() with takes on that
responsibility. Code that wants to retry a memory allocation can call
this function passing the GFP flags that were used. It will wait
however is appropriate.
For now, it only considers __GFP_NORETRY and whatever
gfpflags_allow_blocking() tests. If blocking is allowed without
__GFP_NORETRY, then alloc_page either made some reclaim progress, or
waited for a while, before failing. So there is no need for much
further waiting. memalloc_retry_wait() will wait until the current
jiffie ends. If this condition is not met, then alloc_page() won't have
waited much if at all. In that case memalloc_retry_wait() waits about
200ms. This is the delay that most current loops uses.
linux/sched/mm.h needs to be included in some files now,
but linux/backing-dev.h does not.
Link: https://lkml.kernel.org/r/163754371968.13692.1277530886009912421@noble.neil.brown.name
Signed-off-by: NeilBrown <neilb@suse.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-01-15 01:07:14 +03:00
memalloc_retry_wait ( GFP_ATOMIC ) ;
2021-08-19 17:49:26 +03:00
goto retry ;
}
if ( err )
goto out_error ;
}
2012-12-10 23:06:02 +04:00
/* Clear the content in the xattr space. */
if ( inline_size > EXT4_MIN_INLINE_DATA_SIZE ) {
2017-01-23 03:35:49 +03:00
if ( ( err = ext4_xattr_ibody_find ( inode , & i , & is ) ) ! = 0 )
2012-12-10 23:06:02 +04:00
goto out_error ;
BUG_ON ( is . s . not_found ) ;
value_len = le32_to_cpu ( is . s . here - > e_value_size ) ;
value = kmalloc ( value_len , GFP_NOFS ) ;
2017-01-23 03:35:49 +03:00
if ( ! value ) {
err = - ENOMEM ;
2012-12-10 23:06:02 +04:00
goto out_error ;
2017-01-23 03:35:49 +03:00
}
2012-12-10 23:06:02 +04:00
2017-01-23 03:35:49 +03:00
err = ext4_xattr_ibody_get ( inode , i . name_index ,
i . name , value , value_len ) ;
if ( err < = 0 )
2012-12-10 23:06:02 +04:00
goto out_error ;
i . value = value ;
i . value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
i_size - EXT4_MIN_INLINE_DATA_SIZE : 0 ;
2021-06-03 05:03:02 +03:00
err = ext4_xattr_ibody_set ( handle , inode , & i , & is ) ;
2017-01-23 03:35:49 +03:00
if ( err )
2012-12-10 23:06:02 +04:00
goto out_error ;
}
/* Clear the content within i_blocks. */
2014-01-07 21:58:19 +04:00
if ( i_size < EXT4_MIN_INLINE_DATA_SIZE ) {
void * p = ( void * ) ext4_raw_inode ( & is . iloc ) - > i_block ;
memset ( p + i_size , 0 ,
EXT4_MIN_INLINE_DATA_SIZE - i_size ) ;
}
2012-12-10 23:06:02 +04:00
EXT4_I ( inode ) - > i_inline_size = i_size <
EXT4_MIN_INLINE_DATA_SIZE ?
EXT4_MIN_INLINE_DATA_SIZE : i_size ;
}
out_error :
up_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
out :
brelse ( is . iloc . bh ) ;
2017-01-12 05:50:46 +03:00
ext4_write_unlock_xattr ( inode , & no_expand ) ;
2012-12-10 23:06:02 +04:00
kfree ( value ) ;
if ( inode - > i_nlink )
ext4_orphan_del ( handle , inode ) ;
2017-01-23 03:35:49 +03:00
if ( err = = 0 ) {
inode - > i_mtime = inode - > i_ctime = current_time ( inode ) ;
err = ext4_mark_inode_dirty ( handle , inode ) ;
if ( IS_SYNC ( inode ) )
ext4_handle_sync ( handle ) ;
}
2012-12-10 23:06:02 +04:00
ext4_journal_stop ( handle ) ;
2017-01-23 03:35:49 +03:00
return err ;
2012-12-10 23:06:02 +04:00
}
2012-12-10 23:06:03 +04:00
int ext4_convert_inline_data ( struct inode * inode )
{
2017-01-12 05:50:46 +03:00
int error , needed_blocks , no_expand ;
2012-12-10 23:06:03 +04:00
handle_t * handle ;
struct ext4_iloc iloc ;
if ( ! ext4_has_inline_data ( inode ) ) {
ext4_clear_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) ;
return 0 ;
2022-05-16 15:26:34 +03:00
} else if ( ! ext4_test_inode_state ( inode , EXT4_STATE_MAY_INLINE_DATA ) ) {
/*
* Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is
* cleared . This means we are in the middle of moving of
* inline data to delay allocated block . Just force writeout
* here to finish conversion .
*/
error = filemap_flush ( inode - > i_mapping ) ;
if ( error )
return error ;
if ( ! ext4_has_inline_data ( inode ) )
return 0 ;
2012-12-10 23:06:03 +04:00
}
needed_blocks = ext4_writepage_trans_blocks ( inode ) ;
iloc . bh = NULL ;
error = ext4_get_inode_loc ( inode , & iloc ) ;
if ( error )
return error ;
2013-02-09 06:59:22 +04:00
handle = ext4_journal_start ( inode , EXT4_HT_WRITE_PAGE , needed_blocks ) ;
2012-12-10 23:06:03 +04:00
if ( IS_ERR ( handle ) ) {
error = PTR_ERR ( handle ) ;
goto out_free ;
}
2017-01-12 05:50:46 +03:00
ext4_write_lock_xattr ( inode , & no_expand ) ;
if ( ext4_has_inline_data ( inode ) )
error = ext4_convert_inline_data_nolock ( handle , inode , & iloc ) ;
ext4_write_unlock_xattr ( inode , & no_expand ) ;
2012-12-10 23:06:03 +04:00
ext4_journal_stop ( handle ) ;
out_free :
brelse ( iloc . bh ) ;
return error ;
}