blob: 4dcc0581999881063ece0e2f6d745b7d52da2a5b [file] [log] [blame]
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* jfs_dtree.c: directory B+-tree manager
*
* B+-tree with variable length key directory:
*
* each directory page is structured as an array of 32-byte
* directory entry slots initialized as a freelist
* to avoid search/compaction of free space at insertion.
* when an entry is inserted, a number of slots are allocated
* from the freelist as required to store variable length data
* of the entry; when the entry is deleted, slots of the entry
* are returned to freelist.
*
* leaf entry stores full name as key and file serial number
* (aka inode number) as data.
* internal/router entry stores sufffix compressed name
* as key and simple extent descriptor as data.
*
* each directory page maintains a sorted entry index table
* which stores the start slot index of sorted entries
* to allow binary search on the table.
*
* directory starts as a root/leaf page in on-disk inode
* inline data area.
* when it becomes full, it starts a leaf of a external extent
* of length of 1 block. each time the first leaf becomes full,
* it is extended rather than split (its size is doubled),
* until its length becoms 4 KBytes, from then the extent is split
* with new 4 Kbyte extent when it becomes full
* to reduce external fragmentation of small directories.
*
* blah, blah, blah, for linear scan of directory in pieces by
* readdir().
*
*
* case-insensitive directory file system
*
* names are stored in case-sensitive way in leaf entry.
* but stored, searched and compared in case-insensitive (uppercase) order
* (i.e., both search key and entry key are folded for search/compare):
* (note that case-sensitive order is BROKEN in storage, e.g.,
* sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
*
* entries which folds to the same key makes up a equivalent class
* whose members are stored as contiguous cluster (may cross page boundary)
* but whose order is arbitrary and acts as duplicate, e.g.,
* abc, Abc, aBc, abC)
*
* once match is found at leaf, requires scan forward/backward
* either for, in case-insensitive search, duplicate
* or for, in case-sensitive search, for exact match
*
* router entry must be created/stored in case-insensitive way
* in internal entry:
* (right most key of left page and left most key of right page
* are folded, and its suffix compression is propagated as router
* key in parent)
* (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
* should be made the router key for the split)
*
* case-insensitive search:
*
* fold search key;
*
* case-insensitive search of B-tree:
* for internal entry, router key is already folded;
* for leaf entry, fold the entry key before comparison.
*
* if (leaf entry case-insensitive match found)
* if (next entry satisfies case-insensitive match)
* return EDUPLICATE;
* if (prev entry satisfies case-insensitive match)
* return EDUPLICATE;
* return match;
* else
* return no match;
*
* serialization:
* target directory inode lock is being held on entry/exit
* of all main directory service routines.
*
* log based recovery:
*/
#include <linux/fs.h>
#include <linux/quotaops.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_filsys.h"
#include "jfs_metapage.h"
#include "jfs_dmap.h"
#include "jfs_unicode.h"
#include "jfs_debug.h"
/* dtree split parameter */
struct dtsplit {
struct metapage *mp;
s16 index;
s16 nslot;
struct component_name *key;
ddata_t *data;
struct pxdlist *pxdlist;
};
#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
/* get page buffer for specified block address */
#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
{\
BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\
if (!(RC))\
{\
if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\
((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\
{\
BT_PUTPAGE(MP);\
jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\
MP = NULL;\
RC = -EIO;\
}\
}\
}
/* for consistency */
#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
/*
* forward references
*/
static int dtSplitUp(tid_t tid, struct inode *ip,
struct dtsplit * split, struct btstack * btstack);
static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
static int dtExtendPage(tid_t tid, struct inode *ip,
struct dtsplit * split, struct btstack * btstack);
static int dtSplitRoot(tid_t tid, struct inode *ip,
struct dtsplit * split, struct metapage ** rmpp);
static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
dtpage_t * fp, struct btstack * btstack);
static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
static int dtReadFirst(struct inode *ip, struct btstack * btstack);
static int dtReadNext(struct inode *ip,
loff_t * offset, struct btstack * btstack);
static int dtCompare(struct component_name * key, dtpage_t * p, int si);
static int ciCompare(struct component_name * key, dtpage_t * p, int si,
int flag);
static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
int flag);
static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
int ri, struct component_name * key, int flag);
static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
ddata_t * data, struct dt_lock **);
static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
int do_index);
static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
#define ciToUpper(c) UniStrupr((c)->name)
/*
* read_index_page()
*
* Reads a page of a directory's index table.
* Having metadata mapped into the directory inode's address space
* presents a multitude of problems. We avoid this by mapping to
* the absolute address space outside of the *_metapage routines
*/
static struct metapage *read_index_page(struct inode *inode, s64 blkno)
{
int rc;
s64 xaddr;
int xflag;
s32 xlen;
rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
if (rc || (xaddr == 0))
return NULL;
return read_metapage(inode, xaddr, PSIZE, 1);
}
/*
* get_index_page()
*
* Same as get_index_page(), but get's a new page without reading
*/
static struct metapage *get_index_page(struct inode *inode, s64 blkno)
{
int rc;
s64 xaddr;
int xflag;
s32 xlen;
rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
if (rc || (xaddr == 0))
return NULL;
return get_metapage(inode, xaddr, PSIZE, 1);
}
/*
* find_index()
*
* Returns dtree page containing directory table entry for specified
* index and pointer to its entry.
*
* mp must be released by caller.
*/
static struct dir_table_slot *find_index(struct inode *ip, u32 index,
struct metapage ** mp, s64 *lblock)
{
struct jfs_inode_info *jfs_ip = JFS_IP(ip);
s64 blkno;
s64 offset;
int page_offset;
struct dir_table_slot *slot;
static int maxWarnings = 10;
if (index < 2) {
if (maxWarnings) {
jfs_warn("find_entry called with index = %d", index);
maxWarnings--;
}
return NULL;
}
if (index >= jfs_ip->next_index) {
jfs_warn("find_entry called with index >= next_index");
return NULL;
}
if (jfs_dirtable_inline(ip)) {
/*
* Inline directory table
*/
*mp = NULL;
slot = &jfs_ip->i_dirtable[index - 2];
} else {
offset = (index - 2) * sizeof(struct dir_table_slot);
page_offset = offset & (PSIZE - 1);
blkno = ((offset + 1) >> L2PSIZE) <<
JFS_SBI(ip->i_sb)->l2nbperpage;
if (*mp && (*lblock != blkno)) {
release_metapage(*mp);
*mp = NULL;
}
if (!(*mp)) {
*lblock = blkno;
*mp = read_index_page(ip, blkno);
}
if (!(*mp)) {
jfs_err("free_index: error reading directory table");
return NULL;
}
slot =
(struct dir_table_slot *) ((char *) (*mp)->data +
page_offset);
}
return slot;
}
static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
u32 index)
{
struct tlock *tlck;
struct linelock *llck;
struct lv *lv;
tlck = txLock(tid, ip, mp, tlckDATA);
llck = (struct linelock *) tlck->lock;
if (llck->index >= llck->maxcnt)
llck = txLinelock(llck);
lv = &llck->lv[llck->index];
/*
* Linelock slot size is twice the size of directory table
* slot size. 512 entries per page.
*/
lv->offset = ((index - 2) & 511) >> 1;
lv->length = 1;
llck->index++;
}
/*
* add_index()
*
* Adds an entry to the directory index table. This is used to provide
* each directory entry with a persistent index in which to resume
* directory traversals
*/
static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
{
struct super_block *sb = ip->i_sb;
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_inode_info *jfs_ip = JFS_IP(ip);
u64 blkno;
struct dir_table_slot *dirtab_slot;
u32 index;
struct linelock *llck;
struct lv *lv;
struct metapage *mp;
s64 offset;
uint page_offset;
struct tlock *tlck;
s64 xaddr;
ASSERT(DO_INDEX(ip));
if (jfs_ip->next_index < 2) {
jfs_warn("add_index: next_index = %d. Resetting!",
jfs_ip->next_index);
jfs_ip->next_index = 2;
}
index = jfs_ip->next_index++;
if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
/*
* i_size reflects size of index table, or 8 bytes per entry.
*/
ip->i_size = (loff_t) (index - 1) << 3;
/*
* dir table fits inline within inode
*/
dirtab_slot = &jfs_ip->i_dirtable[index-2];
dirtab_slot->flag = DIR_INDEX_VALID;
dirtab_slot->slot = slot;
DTSaddress(dirtab_slot, bn);
set_cflag(COMMIT_Dirtable, ip);
return index;
}
if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
struct dir_table_slot temp_table[12];
/*
* It's time to move the inline table to an external
* page and begin to build the xtree
*/
if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage))
goto clean_up;
if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
goto clean_up;
}
/*
* Save the table, we're going to overwrite it with the
* xtree root
*/
memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
/*
* Initialize empty x-tree
*/
xtInitRoot(tid, ip);
/*
* Add the first block to the xtree
*/
if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
/* This really shouldn't fail */
jfs_warn("add_index: xtInsert failed!");
memcpy(&jfs_ip->i_dirtable, temp_table,
sizeof (temp_table));
dbFree(ip, xaddr, sbi->nbperpage);
DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
goto clean_up;
}
ip->i_size = PSIZE;
mp = get_index_page(ip, 0);
if (!mp) {
jfs_err("add_index: get_metapage failed!");
xtTruncate(tid, ip, 0, COMMIT_PWMAP);
memcpy(&jfs_ip->i_dirtable, temp_table,
sizeof (temp_table));
goto clean_up;
}
tlck = txLock(tid, ip, mp, tlckDATA);
llck = (struct linelock *) & tlck->lock;
ASSERT(llck->index == 0);
lv = &llck->lv[0];
lv->offset = 0;
lv->length = 6; /* tlckDATA slot size is 16 bytes */
llck->index++;
memcpy(mp->data, temp_table, sizeof(temp_table));
mark_metapage_dirty(mp);
release_metapage(mp);
/*
* Logging is now directed by xtree tlocks
*/
clear_cflag(COMMIT_Dirtable, ip);
}
offset = (index - 2) * sizeof(struct dir_table_slot);
page_offset = offset & (PSIZE - 1);
blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
if (page_offset == 0) {
/*
* This will be the beginning of a new page
*/
xaddr = 0;
if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
jfs_warn("add_index: xtInsert failed!");
goto clean_up;
}
ip->i_size += PSIZE;
if ((mp = get_index_page(ip, blkno)))
memset(mp->data, 0, PSIZE); /* Just looks better */
else
xtTruncate(tid, ip, offset, COMMIT_PWMAP);
} else
mp = read_index_page(ip, blkno);
if (!mp) {
jfs_err("add_index: get/read_metapage failed!");
goto clean_up;
}
lock_index(tid, ip, mp, index);
dirtab_slot =
(struct dir_table_slot *) ((char *) mp->data + page_offset);
dirtab_slot->flag = DIR_INDEX_VALID;
dirtab_slot->slot = slot;
DTSaddress(dirtab_slot, bn);
mark_metapage_dirty(mp);
release_metapage(mp);
return index;
clean_up:
jfs_ip->next_index--;
return 0;
}
/*
* free_index()
*
* Marks an entry to the directory index table as free.
*/
static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
{
struct dir_table_slot *dirtab_slot;
s64 lblock;
struct metapage *mp = NULL;
dirtab_slot = find_index(ip, index, &mp, &lblock);
if (!dirtab_slot)
return;
dirtab_slot->flag = DIR_INDEX_FREE;
dirtab_slot->slot = dirtab_slot->addr1 = 0;
dirtab_slot->addr2 = cpu_to_le32(next);
if (mp) {
lock_index(tid, ip, mp, index);
mark_metapage_dirty(mp);
release_metapage(mp);
} else
set_cflag(COMMIT_Dirtable, ip);
}
/*
* modify_index()
*
* Changes an entry in the directory index table
*/
static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
int slot, struct metapage ** mp, s64 *lblock)
{
struct dir_table_slot *dirtab_slot;
dirtab_slot = find_index(ip, index, mp, lblock);
if (!dirtab_slot)
return;
DTSaddress(dirtab_slot, bn);
dirtab_slot->slot = slot;
if (*mp) {
lock_index(tid, ip, *mp, index);
mark_metapage_dirty(*mp);
} else
set_cflag(COMMIT_Dirtable, ip);
}
/*
* read_index()
*
* reads a directory table slot
*/
static int read_index(struct inode *ip, u32 index,
struct dir_table_slot * dirtab_slot)
{
s64 lblock;
struct metapage *mp = NULL;
struct dir_table_slot *slot;
slot = find_index(ip, index, &mp, &lblock);
if (!slot) {
return -EIO;
}
memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
if (mp)
release_metapage(mp);
return 0;
}
/*
* dtSearch()
*
* function:
* Search for the entry with specified key
*
* parameter:
*
* return: 0 - search result on stack, leaf page pinned;
* errno - I/O error
*/
int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
struct btstack * btstack, int flag)
{
int rc = 0;
int cmp = 1; /* init for empty page */
s64 bn;
struct metapage *mp;
dtpage_t *p;
s8 *stbl;
int base, index, lim;
struct btframe *btsp;
pxd_t *pxd;
int psize = 288; /* initial in-line directory */
ino_t inumber;
struct component_name ciKey;
struct super_block *sb = ip->i_sb;
ciKey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS);
if (!ciKey.name) {
rc = -ENOMEM;
goto dtSearch_Exit2;
}
/* uppercase search key for c-i directory */
UniStrcpy(ciKey.name, key->name);
ciKey.namlen = key->namlen;
/* only uppercase if case-insensitive support is on */
if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
ciToUpper(&ciKey);
}
BT_CLR(btstack); /* reset stack */
/* init level count for max pages to split */
btstack->nsplit = 1;
/*
* search down tree from root:
*
* between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
* internal page, child page Pi contains entry with k, Ki <= K < Kj.
*
* if entry with search key K is not found
* internal page search find the entry with largest key Ki
* less than K which point to the child page to search;
* leaf page search find the entry with smallest key Kj
* greater than K so that the returned index is the position of
* the entry to be shifted right for insertion of new entry.
* for empty tree, search key is greater than any key of the tree.
*
* by convention, root bn = 0.
*/
for (bn = 0;;) {
/* get/pin the page to search */
DT_GETPAGE(ip, bn, mp, psize, p, rc);
if (rc)
goto dtSearch_Exit1;
/* get sorted entry table of the page */
stbl = DT_GETSTBL(p);
/*
* binary search with search key K on the current page.
*/
for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
index = base + (lim >> 1);
if (p->header.flag & BT_LEAF) {
/* uppercase leaf name to compare */
cmp =
ciCompare(&ciKey, p, stbl[index],
JFS_SBI(sb)->mntflag);
} else {
/* router key is in uppercase */
cmp = dtCompare(&ciKey, p, stbl[index]);
}
if (cmp == 0) {
/*
* search hit
*/
/* search hit - leaf page:
* return the entry found
*/
if (p->header.flag & BT_LEAF) {
inumber = le32_to_cpu(
((struct ldtentry *) & p->slot[stbl[index]])->inumber);
/*
* search for JFS_LOOKUP
*/
if (flag == JFS_LOOKUP) {
*data = inumber;
rc = 0;
goto out;
}
/*
* search for JFS_CREATE
*/
if (flag == JFS_CREATE) {
*data = inumber;
rc = -EEXIST;
goto out;
}
/*
* search for JFS_REMOVE or JFS_RENAME
*/
if ((flag == JFS_REMOVE ||
flag == JFS_RENAME) &&
*data != inumber) {
rc = -ESTALE;
goto out;
}
/*
* JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
*/
/* save search result */
*data = inumber;
btsp = btstack->top;
btsp->bn = bn;
btsp->index = index;
btsp->mp = mp;
rc = 0;
goto dtSearch_Exit1;
}
/* search hit - internal page:
* descend/search its child page
*/
goto getChild;
}
if (cmp > 0) {
base = index + 1;
--lim;
}
}
/*
* search miss
*
* base is the smallest index with key (Kj) greater than
* search key (K) and may be zero or (maxindex + 1) index.
*/
/*
* search miss - leaf page
*
* return location of entry (base) where new entry with
* search key K is to be inserted.
*/
if (p->header.flag & BT_LEAF) {
/*
* search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
*/
if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
flag == JFS_RENAME) {
rc = -ENOENT;
goto out;
}
/*
* search for JFS_CREATE|JFS_FINDDIR:
*
* save search result
*/
*data = 0;
btsp = btstack->top;
btsp->bn = bn;
btsp->index = base;
btsp->mp = mp;
rc = 0;
goto dtSearch_Exit1;
}
/*
* search miss - internal page
*
* if base is non-zero, decrement base by one to get the parent
* entry of the child page to search.
*/
index = base ? base - 1 : base;
/*
* go down to child page
*/
getChild:
/* update max. number of pages to split */
if (BT_STACK_FULL(btstack)) {
/* Something's corrupted, mark filesystem dirty so
* chkdsk will fix it.
*/
jfs_error(sb, "stack overrun in dtSearch!");
BT_STACK_DUMP(btstack);
rc = -EIO;
goto out;
}
btstack->nsplit++;
/* push (bn, index) of the parent page/entry */
BT_PUSH(btstack, bn, index);
/* get the child page block number */
pxd = (pxd_t *) & p->slot[stbl[index]];
bn = addressPXD(pxd);
psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
/* unpin the parent page */
DT_PUTPAGE(mp);
}
out:
DT_PUTPAGE(mp);
dtSearch_Exit1:
kfree(ciKey.name);
dtSearch_Exit2:
return rc;
}
/*
* dtInsert()
*
* function: insert an entry to directory tree
*
* parameter:
*
* return: 0 - success;
* errno - failure;
*/
int dtInsert(tid_t tid, struct inode *ip,
struct component_name * name, ino_t * fsn, struct btstack * btstack)
{
int rc = 0;
struct metapage *mp; /* meta-page buffer */
dtpage_t *p; /* base B+-tree index page */
s64 bn;
int index;
struct dtsplit split; /* split information */
ddata_t data;
struct dt_lock *dtlck;
int n;
struct tlock *tlck;
struct lv *lv;
/*
* retrieve search result
*
* dtSearch() returns (leaf page pinned, index at which to insert).
* n.b. dtSearch() may return index of (maxindex + 1) of
* the full page.
*/
DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
/*
* insert entry for new key
*/
if (DO_INDEX(ip)) {
if (JFS_IP(ip)->next_index == DIREND) {
DT_PUTPAGE(mp);
return -EMLINK;
}
n = NDTLEAF(name->namlen);
data.leaf.tid = tid;
data.leaf.ip = ip;
} else {
n = NDTLEAF_LEGACY(name->namlen);
data.leaf.ip = NULL; /* signifies legacy directory format */
}
data.leaf.ino = *fsn;
/*
* leaf page does not have enough room for new entry:
*
* extend/split the leaf page;
*
* dtSplitUp() will insert the entry and unpin the leaf page.
*/
if (n > p->header.freecnt) {
split.mp = mp;
split.index = index;
split.nslot = n;
split.key = name;
split.data = &data;
rc = dtSplitUp(tid, ip, &split, btstack);
return rc;
}
/*
* leaf page does have enough room for new entry:
*
* insert the new data entry into the leaf page;
*/
BT_MARK_DIRTY(mp, ip);
/*
* acquire a transaction lock on the leaf page
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
dtlck = (struct dt_lock *) & tlck->lock;
ASSERT(dtlck->index == 0);
lv = & dtlck->lv[0];
/* linelock header */
lv->offset = 0;
lv->length = 1;
dtlck->index++;
dtInsertEntry(p, index, name, &data, &dtlck);
/* linelock stbl of non-root leaf page */
if (!(p->header.flag & BT_ROOT)) {
if (dtlck->index >= dtlck->maxcnt)
dtlck = (struct dt_lock *) txLinelock(dtlck);
lv = & dtlck->lv[dtlck->index];
n = index >> L2DTSLOTSIZE;
lv->offset = p->header.stblindex + n;
lv->length =
((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
dtlck->index++;
}
/* unpin the leaf page */
DT_PUTPAGE(mp);
return 0;
}
/*
* dtSplitUp()
*
* function: propagate insertion bottom up;
*
* parameter:
*
* return: 0 - success;
* errno - failure;
* leaf page unpinned;
*/
static int dtSplitUp(tid_t tid,
struct inode *ip, struct dtsplit * split, struct btstack * btstack)
{
struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
int rc = 0;
struct metapage *smp;
dtpage_t *sp; /* split page */
struct metapage *rmp;
dtpage_t *rp; /* new right page split from sp */
pxd_t rpxd; /* new right page extent descriptor */
struct metapage *lmp;
dtpage_t *lp; /* left child page */
int skip; /* index of entry of insertion */
struct btframe *parent; /* parent page entry on traverse stack */
s64 xaddr, nxaddr;
int xlen, xsize;
struct pxdlist pxdlist;
pxd_t *pxd;
struct component_name key = { 0, NULL };
ddata_t *data = split->data;
int n;
struct dt_lock *dtlck;
struct tlock *tlck;
struct lv *lv;
int quota_allocation = 0;
/* get split page */
smp = split->mp;
sp = DT_PAGE(ip, smp);
key.name = kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), GFP_NOFS);
if (!key.name) {
DT_PUTPAGE(smp);
rc = -ENOMEM;
goto dtSplitUp_Exit;
}
/*
* split leaf page
*
* The split routines insert the new entry, and
* acquire txLock as appropriate.
*/
/*
* split root leaf page:
*/
if (sp->header.flag & BT_ROOT) {
/*
* allocate a single extent child page
*/
xlen = 1;
n = sbi->bsize >> L2DTSLOTSIZE;
n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */
n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
if (n <= split->nslot)
xlen++;
if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
DT_PUTPAGE(smp);
goto freeKeyName;
}
pxdlist.maxnpxd = 1;
pxdlist.npxd = 0;
pxd = &pxdlist.pxd[0];
PXDaddress(pxd, xaddr);
PXDlength(pxd, xlen);
split->pxdlist = &pxdlist;
rc = dtSplitRoot(tid, ip, split, &rmp);
if (rc)
dbFree(ip, xaddr, xlen);
else
DT_PUTPAGE(rmp);
DT_PUTPAGE(smp);
if (!DO_INDEX(ip))
ip->i_size = xlen << sbi->l2bsize;
goto freeKeyName;
}
/*
* extend first leaf page
*
* extend the 1st extent if less than buffer page size
* (dtExtendPage() reurns leaf page unpinned)
*/
pxd = &sp->header.self;
xlen = lengthPXD(pxd);
xsize = xlen << sbi->l2bsize;
if (xsize < PSIZE) {
xaddr = addressPXD(pxd);
n = xsize >> L2DTSLOTSIZE;
n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */
if ((n + sp->header.freecnt) <= split->nslot)
n = xlen + (xlen << 1);
else
n = xlen;
/* Allocate blocks to quota. */
if (DQUOT_ALLOC_BLOCK(ip, n)) {
rc = -EDQUOT;
goto extendOut;
}
quota_allocation += n;
if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
(s64) n, &nxaddr)))
goto extendOut;
pxdlist.maxnpxd = 1;
pxdlist.npxd = 0;
pxd = &pxdlist.pxd[0];
PXDaddress(pxd, nxaddr)
PXDlength(pxd, xlen + n);
split->pxdlist = &pxdlist;
if ((rc = dtExtendPage(tid, ip, split, btstack))) {
nxaddr = addressPXD(pxd);
if (xaddr != nxaddr) {
/* free relocated extent */
xlen = lengthPXD(pxd);
dbFree(ip, nxaddr, (s64) xlen);
} else {
/* free extended delta */
xlen = lengthPXD(pxd) - n;
xaddr = addressPXD(pxd) + xlen;
dbFree(ip, xaddr, (s64) n);
}
} else if (!DO_INDEX(ip))
ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
extendOut:
DT_PUTPAGE(smp);
goto freeKeyName;
}
/*
* split leaf page <sp> into <sp> and a new right page <rp>.
*
* return <rp> pinned and its extent descriptor <rpxd>
*/
/*
* allocate new directory page extent and
* new index page(s) to cover page split(s)
*
* allocation hint: ?
*/
n = btstack->nsplit;
pxdlist.maxnpxd = pxdlist.npxd = 0;
xlen = sbi->nbperpage;
for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
PXDaddress(pxd, xaddr);
PXDlength(pxd, xlen);
pxdlist.maxnpxd++;
continue;
}
DT_PUTPAGE(smp);
/* undo allocation */
goto splitOut;
}
split->pxdlist = &pxdlist;
if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
DT_PUTPAGE(smp);
/* undo allocation */
goto splitOut;
}
if (!DO_INDEX(ip))
ip->i_size += PSIZE;
/*
* propagate up the router entry for the leaf page just split
*
* insert a router entry for the new page into the parent page,
* propagate the insert/split up the tree by walking back the stack
* of (bn of parent page, index of child page entry in parent page)
* that were traversed during the search for the page that split.
*
* the propagation of insert/split up the tree stops if the root
* splits or the page inserted into doesn't have to split to hold
* the new entry.
*
* the parent entry for the split page remains the same, and
* a new entry is inserted at its right with the first key and
* block number of the new right page.
*
* There are a maximum of 4 pages pinned at any time:
* two children, left parent and right parent (when the parent splits).
* keep the child pages pinned while working on the parent.
* make sure that all pins are released at exit.
*/
while ((parent = BT_POP(btstack)) != NULL) {
/* parent page specified by stack frame <parent> */
/* keep current child pages (<lp>, <rp>) pinned */
lmp = smp;
lp = sp;
/*
* insert router entry in parent for new right child page <rp>
*/
/* get the parent page <sp> */
DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
if (rc) {
DT_PUTPAGE(lmp);
DT_PUTPAGE(rmp);
goto splitOut;
}
/*
* The new key entry goes ONE AFTER the index of parent entry,
* because the split was to the right.
*/
skip = parent->index + 1;
/*
* compute the key for the router entry
*
* key suffix compression:
* for internal pages that have leaf pages as children,
* retain only what's needed to distinguish between
* the new entry and the entry on the page to its left.
* If the keys compare equal, retain the entire key.
*
* note that compression is performed only at computing
* router key at the lowest internal level.
* further compression of the key between pairs of higher
* level internal pages loses too much information and
* the search may fail.
* (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
* results in two adjacent parent entries (a)(xx).
* if split occurs between these two entries, and
* if compression is applied, the router key of parent entry
* of right page (x) will divert search for x into right
* subtree and miss x in the left subtree.)
*
* the entire key must be retained for the next-to-leftmost
* internal key at any level of the tree, or search may fail
* (e.g., ?)
*/
switch (rp->header.flag & BT_TYPE) {
case BT_LEAF:
/*
* compute the length of prefix for suffix compression
* between last entry of left page and first entry
* of right page
*/
if ((sp->header.flag & BT_ROOT && skip > 1) ||
sp->header.prev != 0 || skip > 1) {
/* compute uppercase router prefix key */
rc = ciGetLeafPrefixKey(lp,
lp->header.nextindex-1,
rp, 0, &key,
sbi->mntflag);
if (rc) {
DT_PUTPAGE(lmp);
DT_PUTPAGE(rmp);
DT_PUTPAGE(smp);
goto splitOut;
}
} else {
/* next to leftmost entry of
lowest internal level */
/* compute uppercase router key */
dtGetKey(rp, 0, &key, sbi->mntflag);
key.name[key.namlen] = 0;
if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
ciToUpper(&key);
}
n = NDTINTERNAL(key.namlen);
break;
case BT_INTERNAL:
dtGetKey(rp, 0, &key, sbi->mntflag);
n = NDTINTERNAL(key.namlen);
break;
default:
jfs_err("dtSplitUp(): UFO!");
break;
}
/* unpin left child page */
DT_PUTPAGE(lmp);
/*
* compute the data for the router entry
*/
data->xd = rpxd; /* child page xd */
/*
* parent page is full - split the parent page
*/
if (n > sp->header.freecnt) {
/* init for parent page split */
split->mp = smp;
split->index = skip; /* index at insert */
split->nslot = n;
split->key = &key;
/* split->data = data; */
/* unpin right child page */
DT_PUTPAGE(rmp);
/* The split routines insert the new entry,
* acquire txLock as appropriate.
* return <rp> pinned and its block number <rbn>.
*/
rc = (sp->header.flag & BT_ROOT) ?
dtSplitRoot(tid, ip, split, &rmp) :
dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
if (rc) {
DT_PUTPAGE(smp);
goto splitOut;
}
/* smp and rmp are pinned */
}
/*
* parent page is not full - insert router entry in parent page
*/
else {
BT_MARK_DIRTY(smp, ip);
/*
* acquire a transaction lock on the parent page
*/
tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
dtlck = (struct dt_lock *) & tlck->lock;
ASSERT(dtlck->index == 0);
lv = & dtlck->lv[0];
/* linelock header */
lv->offset = 0;
lv->length = 1;
dtlck->index++;
/* linelock stbl of non-root parent page */
if (!(sp->header.flag & BT_ROOT)) {
lv++;
n = skip >> L2DTSLOTSIZE;
lv->offset = sp->header.stblindex + n;
lv->length =
((sp->header.nextindex -
1) >> L2DTSLOTSIZE) - n + 1;
dtlck->index++;
}
dtInsertEntry(sp, skip, &key, data, &dtlck);
/* exit propagate up */
break;
}
}
/* unpin current split and its right page */
DT_PUTPAGE(smp);
DT_PUTPAGE(rmp);
/*
* free remaining extents allocated for split
*/
splitOut:
n = pxdlist.npxd;
pxd = &pxdlist.pxd[n];
for (; n < pxdlist.maxnpxd; n++, pxd++)
dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
freeKeyName:
kfree(key.name);
/* Rollback quota allocation */
if (rc && quota_allocation)
DQUOT_FREE_BLOCK(ip, quota_allocation);
dtSplitUp_Exit:
return rc;
}
/*
* dtSplitPage()
*
* function: Split a non-root page of a btree.
*
* parameter:
*
* return: 0 - success;
* errno - failure;
* return split and new page pinned;
*/
static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
{
int rc = 0;
struct metapage *smp;
dtpage_t *sp;
struct metapage *rmp;
dtpage_t *rp; /* new right page allocated */
s64 rbn; /* new right page block number */
struct metapage *mp;
dtpage_t *p;
s64 nextbn;
struct pxdlist *pxdlist;
pxd_t *pxd;
int skip, nextindex, half, left, nxt, off, si;
struct ldtentry *ldtentry;
struct idtentry *idtentry;
u8 *stbl;
struct dtslot *f;
int fsi, stblsize;
int n;
struct dt_lock *sdtlck, *rdtlck;
struct tlock *tlck;
struct dt_lock *dtlck;
struct lv *slv, *rlv, *lv;
/* get split page */
smp = split->mp;
sp = DT_PAGE(ip, smp);
/*
* allocate the new right page for the split
*/
pxdlist = split->pxdlist;
pxd = &pxdlist->pxd[pxdlist->npxd];
pxdlist->npxd++;
rbn = addressPXD(pxd);
rmp = get_metapage(ip, rbn, PSIZE, 1);
if (rmp == NULL)
return -EIO;
/* Allocate blocks to quota. */
if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
release_metapage(rmp);
return -EDQUOT;
}
jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
BT_MARK_DIRTY(rmp, ip);
/*
* acquire a transaction lock on the new right page
*/
tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
rdtlck = (struct dt_lock *) & tlck->lock;
rp = (dtpage_t *) rmp->data;
*rpp = rp;
rp->header.self = *pxd;
BT_MARK_DIRTY(smp, ip);
/*
* acquire a transaction lock on the split page
*
* action:
*/
tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
sdtlck = (struct dt_lock *) & tlck->lock;
/* linelock header of split page */
ASSERT(sdtlck->index == 0);
slv = & sdtlck->lv[0];
slv->offset = 0;
slv->length = 1;
sdtlck->index++;
/*
* initialize/update sibling pointers between sp and rp
*/
nextbn = le64_to_cpu(sp->header.next);
rp->header.next = cpu_to_le64(nextbn);
rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
sp->header.next = cpu_to_le64(rbn);
/*
* initialize new right page
*/
rp->header.flag = sp->header.flag;
/* compute sorted entry table at start of extent data area */
rp->header.nextindex = 0;
rp->header.stblindex = 1;
n = PSIZE >> L2DTSLOTSIZE;
rp->header.maxslot = n;
stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */
/* init freelist */
fsi = rp->header.stblindex + stblsize;
rp->header.freelist = fsi;
rp->header.freecnt = rp->header.maxslot - fsi;
/*
* sequential append at tail: append without split
*
* If splitting the last page on a level because of appending
* a entry to it (skip is maxentry), it's likely that the access is
* sequential. Adding an empty page on the side of the level is less
* work and can push the fill factor much higher than normal.
* If we're wrong it's no big deal, we'll just do the split the right
* way next time.
* (It may look like it's equally easy to do a similar hack for
* reverse sorted data, that is, split the tree left,
* but it's not. Be my guest.)
*/
if (nextbn == 0 && split->index == sp->header.nextindex) {
/* linelock header + stbl (first slot) of new page */
rlv = & rdtlck->lv[rdtlck->index];
rlv->offset = 0;
rlv->length = 2;
rdtlck->index++;
/*
* initialize freelist of new right page
*/
f = &rp->slot[fsi];
for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
f->next = fsi;
f->next = -1;
/* insert entry at the first entry of the new right page */
dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
goto out;
}
/*
* non-sequential insert (at possibly middle page)
*/
/*
* update prev pointer of previous right sibling page;
*/
if (nextbn != 0) {
DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
if (rc) {
discard_metapage(rmp);
return rc;
}
BT_MARK_DIRTY(mp, ip);
/*
* acquire a transaction lock on the next page
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
tlck, ip, mp);
dtlck = (struct dt_lock *) & tlck->lock;
/* linelock header of previous right sibling page */
lv = & dtlck->lv[dtlck->index];
lv->offset = 0;
lv->length = 1;
dtlck->index++;
p->header.prev = cpu_to_le64(rbn);
DT_PUTPAGE(mp);
}
/*
* split the data between the split and right pages.
*/
skip = split->index;
half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */
left = 0;
/*
* compute fill factor for split pages
*
* <nxt> traces the next entry to move to rp
* <off> traces the next entry to stay in sp
*/
stbl = (u8 *) & sp->slot[sp->header.stblindex];
nextindex = sp->header.nextindex;
for (nxt = off = 0; nxt < nextindex; ++off) {
if (off == skip)
/* check for fill factor with new entry size */
n = split->nslot;
else {
si = stbl[nxt];
switch (sp->header.flag & BT_TYPE) {
case BT_LEAF:
ldtentry = (struct ldtentry *) & sp->slot[si];
if (DO_INDEX(ip))
n = NDTLEAF(ldtentry->namlen);
else
n = NDTLEAF_LEGACY(ldtentry->
namlen);
break;
case BT_INTERNAL:
idtentry = (struct idtentry *) & sp->slot[si];
n = NDTINTERNAL(idtentry->namlen);
break;
default:
break;
}
++nxt; /* advance to next entry to move in sp */
}
left += n;
if (left >= half)
break;
}
/* <nxt> poins to the 1st entry to move */
/*
* move entries to right page
*
* dtMoveEntry() initializes rp and reserves entry for insertion
*
* split page moved out entries are linelocked;
* new/right page moved in entries are linelocked;
*/
/* linelock header + stbl of new right page */
rlv = & rdtlck->lv[rdtlck->index];
rlv->offset = 0;
rlv->length = 5;
rdtlck->index++;
dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
sp->header.nextindex = nxt;
/*
* finalize freelist of new right page
*/
fsi = rp->header.freelist;
f = &rp->slot[fsi];
for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
f->next = fsi;
f->next = -1;
/*
* Update directory index table for entries now in right page
*/
if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
s64 lblock;
mp = NULL;
stbl = DT_GETSTBL(rp);
for (n = 0; n < rp->header.nextindex; n++) {
ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
modify_index(tid, ip, le32_to_cpu(ldtentry->index),
rbn, n, &mp, &lblock);
}
if (mp)
release_metapage(mp);
}
/*
* the skipped index was on the left page,
*/
if (skip <= off) {
/* insert the new entry in the split page */
dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
/* linelock stbl of split page */
if (sdtlck->index >= sdtlck->maxcnt)
sdtlck = (struct dt_lock *) txLinelock(sdtlck);
slv = & sdtlck->lv[sdtlck->index];
n = skip >> L2DTSLOTSIZE;
slv->offset = sp->header.stblindex + n;
slv->length =
((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
sdtlck->index++;
}
/*
* the skipped index was on the right page,
*/
else {
/* adjust the skip index to reflect the new position */
skip -= nxt;
/* insert the new entry in the right page */
dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
}
out:
*rmpp = rmp;
*rpxdp = *pxd;
return rc;
}
/*
* dtExtendPage()
*
* function: extend 1st/only directory leaf page
*
* parameter:
*
* return: 0 - success;
* errno - failure;
* return extended page pinned;
*/
static int dtExtendPage(tid_t tid,
struct inode *ip, struct dtsplit * split, struct btstack * btstack)
{
struct super_block *sb = ip->i_sb;
int rc;
struct metapage *smp, *pmp, *mp;
dtpage_t *sp, *pp;
struct pxdlist *pxdlist;
pxd_t *pxd, *tpxd;
int xlen, xsize;
int newstblindex, newstblsize;
int oldstblindex, oldstblsize;
int fsi, last;
struct dtslot *f;
struct btframe *parent;
int n;
struct dt_lock *dtlck;
s64 xaddr, txaddr;
struct tlock *tlck;
struct pxd_lock *pxdlock;
struct lv *lv;
uint type;
struct ldtentry *ldtentry;
u8 *stbl;
/* get page to extend */
smp = split->mp;
sp = DT_PAGE(ip, smp);
/* get parent/root page */
parent = BT_POP(btstack);
DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
if (rc)
return (rc);
/*
* extend the extent
*/
pxdlist = split->pxdlist;
pxd = &pxdlist->pxd[pxdlist->npxd];
pxdlist->npxd++;
xaddr = addressPXD(pxd);
tpxd = &sp->header.self;
txaddr = addressPXD(tpxd);
/* in-place extension */
if (xaddr == txaddr) {
type = tlckEXTEND;
}
/* relocation */
else {
type = tlckNEW;
/* save moved extent descriptor for later free */
tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
pxdlock = (struct pxd_lock *) & tlck->lock;
pxdlock->flag = mlckFREEPXD;
pxdlock->pxd = sp->header.self;
pxdlock->index = 1;
/*
* Update directory index table to reflect new page address
*/
if (DO_INDEX(ip)) {
s64 lblock;
mp = NULL;
stbl = DT_GETSTBL(sp);
for (n = 0; n < sp->header.nextindex; n++) {
ldtentry =
(struct ldtentry *) & sp->slot[stbl[n]];
modify_index(tid, ip,
le32_to_cpu(ldtentry->index),
xaddr, n, &mp, &lblock);
}
if (mp)
release_metapage(mp);
}
}
/*
* extend the page
*/
sp->header.self = *pxd;
jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
BT_MARK_DIRTY(smp, ip);
/*
* acquire a transaction lock on the extended/leaf page
*/
tlck = txLock(tid, ip, smp, tlckDTREE | type);
dtlck = (struct dt_lock *) & tlck->lock;
lv = & dtlck->lv[0];
/* update buffer extent descriptor of extended page */
xlen = lengthPXD(pxd);
xsize = xlen << JFS_SBI(sb)->l2bsize;
/*
* copy old stbl to new stbl at start of extended area
*/
oldstblindex = sp->header.stblindex;
oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
newstblindex = sp->header.maxslot;
n = xsize >> L2DTSLOTSIZE;
newstblsize = (n + 31) >> L2DTSLOTSIZE;
memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
sp->header.nextindex);
/*
* in-line extension: linelock old area of extended page
*/
if (type == tlckEXTEND) {
/* linelock header */
lv->offset = 0;
lv->length = 1;
dtlck->index++;
lv++;
/* linelock new stbl of extended page */
lv->offset = newstblindex;
lv->length = newstblsize;
}
/*
* relocation: linelock whole relocated area
*/
else {
lv->offset = 0;
lv->length = sp->header.maxslot + newstblsize;
}
dtlck->index++;
sp->header.maxslot = n;
sp->header.stblindex = newstblindex;
/* sp->header.nextindex remains the same */
/*
* add old stbl region at head of freelist
*/
fsi = oldstblindex;
f = &sp->slot[fsi];
last = sp->header.freelist;
for (n = 0; n < oldstblsize; n++, fsi++, f++) {
f->next = last;
last = fsi;
}
sp->header.freelist = last;
sp->header.freecnt += oldstblsize;
/*
* append free region of newly extended area at tail of freelist
*/
/* init free region of newly extended area */
fsi = n = newstblindex + newstblsize;
f = &sp->slot[fsi];
for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
f->next = fsi;
f->next = -1;
/* append new free region at tail of old freelist */
fsi = sp->header.freelist;
if (fsi == -1)
sp->header.freelist = n;
else {
do {
f = &sp->slot[fsi];
fsi = f->next;
} while (fsi != -1);
f->next = n;
}
sp->header.freecnt += sp->header.maxslot - n;
/*
* insert the new entry
*/
dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
BT_MARK_DIRTY(pmp, ip);
/*
* linelock any freeslots residing in old extent
*/
if (type == tlckEXTEND) {
n = sp->header.maxslot >> 2;
if (sp->header.freelist < n)
dtLinelockFreelist(sp, n, &dtlck);
}
/*
* update parent entry on the parent/root page
*/
/*
* acquire a transaction lock on the parent/root page
*/
tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
dtlck = (struct dt_lock *) & tlck->lock;
lv = & dtlck->lv[dtlck->index];
/* linelock parent entry - 1st slot */
lv->offset = 1;
lv->length = 1;
dtlck->index++;
/* update the parent pxd for page extension */
tpxd = (pxd_t *) & pp->slot[1];
*tpxd = *pxd;
DT_PUTPAGE(pmp);
return 0;
}
/*
* dtSplitRoot()
*
* function:
* split the full root page into
* original/root/split page and new right page
* i.e., root remains fixed in tree anchor (inode) and
* the root is copied to a single new right child page
* since root page << non-root page, and
* the split root page contains a single entry for the
* new right child page.
*
* parameter:
*
* return: 0 - success;
* errno - failure;
* return new page pinned;
*/
static int dtSplitRoot(tid_t tid,
struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
{
struct super_block *sb = ip->i_sb;
struct metapage *smp;
dtroot_t *sp;
struct metapage *rmp;
dtpage_t *rp;
s64 rbn;
int xlen;
int xsize;
struct dtslot *f;
s8 *stbl;
int fsi, stblsize, n;
struct idtentry *s;
pxd_t *ppxd;
struct pxdlist *pxdlist;
pxd_t *pxd;
struct dt_lock *dtlck;
struct tlock *tlck;
struct lv *lv;
/* get split root page */
smp = split->mp;
sp = &JFS_IP(ip)->i_dtroot;
/*
* allocate/initialize a single (right) child page
*
* N.B. at first split, a one (or two) block to fit new entry
* is allocated; at subsequent split, a full page is allocated;
*/
pxdlist = split->pxdlist;
pxd = &pxdlist->pxd[pxdlist->npxd];
pxdlist->npxd++;
rbn = addressPXD(pxd);
xlen = lengthPXD(pxd);
xsize = xlen << JFS_SBI(sb)->l2bsize;
rmp = get_metapage(ip, rbn, xsize, 1);
if (!rmp)
return -EIO;
rp = rmp->data;
/* Allocate blocks to quota. */
if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
release_metapage(rmp);
return -EDQUOT;
}
BT_MARK_DIRTY(rmp, ip);
/*
* acquire a transaction lock on the new right page
*/
tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
dtlck = (struct dt_lock *) & tlck->lock;
rp->header.flag =
(sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
rp->header.self = *pxd;
/* initialize sibling pointers */
rp->header.next = 0;
rp->header.prev = 0;
/*
* move in-line root page into new right page extent
*/
/* linelock header + copied entries + new stbl (1st slot) in new page */
ASSERT(dtlck->index == 0);
lv = & dtlck->lv[0];
lv->offset = 0;
lv->length = 10; /* 1 + 8 + 1 */
dtlck->index++;
n = xsize >> L2DTSLOTSIZE;
rp->header.maxslot = n;
stblsize = (n + 31) >> L2DTSLOTSIZE;
/* copy old stbl to new stbl at start of extended area */
rp->header.stblindex = DTROOTMAXSLOT;
stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
memcpy(stbl, sp->header.stbl, sp->header.nextindex);
rp->header.nextindex = sp->header.nextindex;
/* copy old data area to start of new data area */
memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
/*
* append free region of newly extended area at tail of freelist
*/
/* init free region of newly extended area */
fsi = n = DTROOTMAXSLOT + stblsize;
f = &rp->slot[fsi];
for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
f->next = fsi;
f->next = -1;
/* append new free region at tail of old freelist */
fsi = sp->header.freelist;
if (fsi == -1)
rp->header.freelist = n;
else {
rp->header.freelist = fsi;
do {
f = &rp->slot[fsi];
fsi = f->next;
} while (fsi != -1);
f->next = n;
}
rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
/*
* Update directory index table for entries now in right page
*/
if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
s64 lblock;
struct metapage *mp = NULL;
struct ldtentry *ldtentry;
stbl = DT_GETSTBL(rp);
for (n = 0; n < rp->header.nextindex; n++) {
ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
modify_index(tid, ip, le32_to_cpu(ldtentry->index),
rbn, n, &mp, &lblock);
}
if (mp)
release_metapage(mp);
}
/*
* insert the new entry into the new right/child page
* (skip index in the new right page will not change)
*/
dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
/*
* reset parent/root page
*
* set the 1st entry offset to 0, which force the left-most key
* at any level of the tree to be less than any search key.
*
* The btree comparison code guarantees that the left-most key on any
* level of the tree is never used, so it doesn't need to be filled in.
*/
BT_MARK_DIRTY(smp, ip);
/*
* acquire a transaction lock on the root page (in-memory inode)
*/
tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
dtlck = (struct dt_lock *) & tlck->lock;
/* linelock root */
ASSERT(dtlck->index == 0);
lv = & dtlck->lv[0];
lv->offset = 0;
lv->length = DTROOTMAXSLOT;
dtlck->index++;
/* update page header of root */
if (sp->header.flag & BT_LEAF) {
sp->header.flag &= ~BT_LEAF;
sp->header.flag |= BT_INTERNAL;
}
/* init the first entry */
s = (struct idtentry *) & sp->slot[DTENTRYSTART];
ppxd = (pxd_t *) s;
*ppxd = *pxd;
s->next = -1;
s->namlen = 0;
stbl = sp->header.stbl;
stbl[0] = DTENTRYSTART;
sp->header.nextindex = 1;
/* init freelist */
fsi = DTENTRYSTART + 1;
f = &sp->slot[fsi];
/* init free region of remaining area */
for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
f->next = fsi;
f->next = -1;
sp->header.freelist = DTENTRYSTART + 1;
sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
*rmpp = rmp;
return 0;
}
/*
* dtDelete()
*
* function: delete the entry(s) referenced by a key.
*
* parameter:
*
* return:
*/
int dtDelete(tid_t tid,
struct inode *ip, struct component_name * key, ino_t * ino, int flag)
{
int rc = 0;
s64 bn;
struct metapage *mp, *imp;
dtpage_t *p;
int index;
struct btstack btstack;
struct dt_lock *dtlck;
struct tlock *tlck;
struct lv *lv;
int i;
struct ldtentry *ldtentry;
u8 *stbl;
u32 table_index, next_index;
struct metapage *nmp;
dtpage_t *np;
/*
* search for the entry to delete:
*
* dtSearch() returns (leaf page pinned, index at which to delete).
*/
if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
return rc;
/* retrieve search result */
DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
/*
* We need to find put the index of the next entry into the
* directory index table in order to resume a readdir from this
* entry.
*/
if (DO_INDEX(ip)) {
stbl = DT_GETSTBL(p);
ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
table_index = le32_to_cpu(ldtentry->index);
if (index == (p->header.nextindex - 1)) {
/*
* Last entry in this leaf page
*/
if ((p->header.flag & BT_ROOT)
|| (p->header.next == 0))
next_index = -1;
else {
/* Read next leaf page */
DT_GETPAGE(ip, le64_to_cpu(p->header.next),
nmp, PSIZE, np, rc);
if (rc)
next_index = -1;
else {
stbl = DT_GETSTBL(np);
ldtentry =
(struct ldtentry *) & np->
slot[stbl[0]];
next_index =
le32_to_cpu(ldtentry->index);
DT_PUTPAGE(nmp);
}
}
} else {
ldtentry =
(struct ldtentry *) & p->slot[stbl[index + 1]];
next_index = le32_to_cpu(ldtentry->index);
}
free_index(tid, ip, table_index, next_index);
}
/*
* the leaf page becomes empty, delete the page
*/
if (p->header.nextindex == 1) {
/* delete empty page */
rc = dtDeleteUp(tid, ip, mp, p, &btstack);
}
/*
* the leaf page has other entries remaining:
*
* delete the entry from the leaf page.
*/
else {
BT_MARK_DIRTY(mp, ip);
/*
* acquire a transaction lock on the leaf page
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
dtlck = (struct dt_lock *) & tlck->lock;
/*
* Do not assume that dtlck->index will be zero. During a
* rename within a directory, this transaction may have
* modified this page already when adding the new entry.
*/
/* linelock header */
if (dtlck->index >= dtlck->maxcnt)
dtlck = (struct dt_lock *) txLinelock(dtlck);
lv = & dtlck->lv[dtlck->index];
lv->offset = 0;
lv->length = 1;
dtlck->index++;
/* linelock stbl of non-root leaf page */
if (!(p->header.flag & BT_ROOT)) {
if (dtlck->index >= dtlck->maxcnt)
dtlck = (struct dt_lock *) txLinelock(dtlck);
lv = & dtlck->lv[dtlck->index];
i = index >> L2DTSLOTSIZE;
lv->offset = p->header.stblindex + i;
lv->length =
((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
i + 1;
dtlck->index++;
}
/* free the leaf entry */
dtDeleteEntry(p, index, &dtlck);
/*
* Update directory index table for entries moved in stbl
*/
if (DO_INDEX(ip) && index < p->header.nextindex) {
s64 lblock;
imp = NULL;
stbl = DT_GETSTBL(p);
for (i = index; i < p->header.nextindex; i++) {
ldtentry =
(struct ldtentry *) & p->slot[stbl[i]];
modify_index(tid, ip,
le32_to_cpu(ldtentry->index),
bn, i, &imp, &lblock);
}
if (imp)
release_metapage(imp);
}
DT_PUTPAGE(mp);
}
return rc;
}
/*
* dtDeleteUp()
*
* function:
* free empty pages as propagating deletion up the tree
*
* parameter:
*
* return:
*/
static int dtDeleteUp(tid_t tid, struct inode *ip,
struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
{
int rc = 0;
struct metapage *mp;
dtpage_t *p;
int index, nextindex;
int xlen;
struct btframe *parent;
struct dt_lock *dtlck;
struct tlock *tlck;
struct lv *lv;
struct pxd_lock *pxdlock;
int i;
/*
* keep the root leaf page which has become empty
*/
if (BT_IS_ROOT(fmp)) {
/*
* reset the root
*
* dtInitRoot() acquires txlock on the root
*/
dtInitRoot(tid, ip, PARENT(ip));
DT_PUTPAGE(fmp);
return 0;
}
/*
* free the non-root leaf page
*/
/*
* acquire a transaction lock on the page
*
* write FREEXTENT|NOREDOPAGE log record
* N.B. linelock is overlaid as freed extent descriptor, and
* the buffer page is freed;
*/
tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
pxdlock = (struct pxd_lock *) & tlck->lock;
pxdlock->flag = mlckFREEPXD;
pxdlock->pxd = fp->header.self;
pxdlock->index = 1;
/* update sibling pointers */
if ((rc = dtRelink(tid, ip, fp))) {
BT_PUTPAGE(fmp);
return rc;
}
xlen = lengthPXD(&fp->header.self);
/* Free quota allocation. */
DQUOT_FREE_BLOCK(ip, xlen);
/* free/invalidate its buffer page */
discard_metapage(fmp);
/*
* propagate page deletion up the directory tree
*
* If the delete from the parent page makes it empty,
* continue all the way up the tree.
* stop if the root page is reached (which is never deleted) or
* if the entry deletion does not empty the page.
*/
while ((parent = BT_POP(btstack)) != NULL) {
/* pin the parent page <sp> */
DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
if (rc)
return rc;
/*
* free the extent of the child page deleted
*/
index = parent->index;
/*
* delete the entry for the child page from parent
*/
nextindex = p->header.nextindex;
/*
* the parent has the single entry being deleted:
*
* free the parent page which has become empty.
*/
if (nextindex == 1) {
/*
* keep the root internal page which has become empty
*/
if (p->header.flag & BT_ROOT) {
/*
* reset the root
*
* dtInitRoot() acquires txlock on the root
*/
dtInitRoot(tid, ip, PARENT(ip));
DT_PUTPAGE(mp);
return 0;
}
/*
* free the parent page
*/
else {
/*
* acquire a transaction lock on the page
*
* write FREEXTENT|NOREDOPAGE log record
*/
tlck =
txMaplock(tid, ip,
tlckDTREE | tlckFREE);
pxdlock = (struct pxd_lock *) & tlck->lock;
pxdlock->flag = mlckFREEPXD;
pxdlock->pxd = p->header.self;
pxdlock->index = 1;
/* update sibling pointers */
if ((rc = dtRelink(tid, ip, p))) {
DT_PUTPAGE(mp);
return rc;
}
xlen = lengthPXD(&p->header.self);
/* Free quota allocation */
DQUOT_FREE_BLOCK(ip, xlen);
/* free/invalidate its buffer page */
discard_metapage(mp);
/* propagate up */
continue;
}
}
/*
* the parent has other entries remaining:
*
* delete the router entry from the parent page.
*/
BT_MARK_DIRTY(mp, ip);
/*
* acquire a transaction lock on the page
*
* action: router entry deletion
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
dtlck = (struct dt_lock *) & tlck->lock;
/* linelock header */
if (dtlck->index >= dtlck->maxcnt)
dtlck = (struct dt_lock *) txLinelock(dtlck);
lv = & dtlck->lv[dtlck->index];
lv->offset = 0;
lv->length = 1;
dtlck->index++;
/* linelock stbl of non-root leaf page */
if (!(p->header.flag & BT_ROOT)) {
if (dtlck->index < dtlck->maxcnt)
lv++;
else {
dtlck = (struct dt_lock *) txLinelock(dtlck);
lv = & dtlck->lv[0];
}
i = index >> L2DTSLOTSIZE;
lv->offset = p->header.stblindex + i;
lv->length =
((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
i + 1;
dtlck->index++;
}
/* free the router entry */
dtDeleteEntry(p, index, &dtlck);
/* reset key of new leftmost entry of level (for consistency) */
if (index == 0 &&
((p->header.flag & BT_ROOT) || p->header.prev == 0))
dtTruncateEntry(p, 0, &dtlck);
/* unpin the parent page */
DT_PUTPAGE(mp);
/* exit propagation up */
break;
}
if (!DO_INDEX(ip))
ip->i_size -= PSIZE;
return 0;
}
#ifdef _NOTYET
/*
* NAME: dtRelocate()
*
* FUNCTION: relocate dtpage (internal or leaf) of directory;
* This function is mainly used by defragfs utility.
*/
int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
s64 nxaddr)
{
int rc = 0;
struct metapage *mp, *pmp, *lmp, *rmp;
dtpage_t *p, *pp, *rp = 0, *lp= 0;
s64 bn;
int index;
struct btstack btstack;
pxd_t *pxd;
s64 oxaddr, nextbn, prevbn;
int xlen, xsize;
struct tlock *tlck;
struct dt_lock *dtlck;
struct pxd_lock *pxdlock;
s8 *stbl;
struct lv *lv;
oxaddr = addressPXD(opxd);
xlen = lengthPXD(opxd);
jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d",
(long long)lmxaddr, (long long)oxaddr, (long long)nxaddr,
xlen);
/*
* 1. get the internal parent dtpage covering
* router entry for the tartget page to be relocated;
*/
rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
if (rc)
return rc;
/* retrieve search result */
DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
jfs_info("dtRelocate: parent router entry validated.");
/*
* 2. relocate the target dtpage
*/
/* read in the target page from src extent */
DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
if (rc) {
/* release the pinned parent page */
DT_PUTPAGE(pmp);
return rc;
}
/*
* read in sibling pages if any to update sibling pointers;
*/
rmp = NULL;
if (p->header.next) {
nextbn = le64_to_cpu(p->header.next);
DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
if (rc) {
DT_PUTPAGE(mp);
DT_PUTPAGE(pmp);
return (rc);
}
}
lmp = NULL;
if (p->header.prev) {
prevbn = le64_to_cpu(p->header.prev);
DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
if (rc) {
DT_PUTPAGE(mp);
DT_PUTPAGE(pmp);
if (rmp)
DT_PUTPAGE(rmp);
return (rc);
}
}
/* at this point, all xtpages to be updated are in memory */
/*
* update sibling pointers of sibling dtpages if any;
*/
if (lmp) {
tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
dtlck = (struct dt_lock *) & tlck->lock;
/* linelock header */
ASSERT(dtlck->index == 0);
lv = & dtlck->lv[0];
lv->offset = 0;
lv->length = 1;
dtlck->index++;
lp->header.next = cpu_to_le64(nxaddr);
DT_PUTPAGE(lmp);
}
if (rmp) {
tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
dtlck = (struct dt_lock *) & tlck->lock;
/* linelock header */
ASSERT(dtlck->index == 0);
lv = & dtlck->lv[0];
lv->offset = 0;
lv->length = 1;
dtlck->index++;
rp->header.prev = cpu_to_le64(nxaddr);
DT_PUTPAGE(rmp);
}
/*
* update the target dtpage to be relocated
*
* write LOG_REDOPAGE of LOG_NEW type for dst page
* for the whole target page (logredo() will apply
* after image and update bmap for allocation of the
* dst extent), and update bmap for allocation of
* the dst extent;
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
dtlck = (struct dt_lock *) & tlck->lock;
/* linelock header */
ASSERT(dtlck->index == 0);
lv = & dtlck->lv[0];
/* update the self address in the dtpage header */
pxd = &p->header.self;
PXDaddress(pxd, nxaddr);
/* the dst page is the same as the src page, i.e.,
* linelock for afterimage of the whole page;
*/
lv->offset = 0;
lv->length = p->header.maxslot;
dtlck->index++;
/* update the buffer extent descriptor of the dtpage */
xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
/* unpin the relocated page */
DT_PUTPAGE(mp);
jfs_info("dtRelocate: target dtpage relocated.");
/* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
* needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
* will also force a bmap update ).
*/
/*
* 3. acquire maplock for the source extent to be freed;
*/
/* for dtpage relocation, write a LOG_NOREDOPAGE record
* for the source dtpage (logredo() will init NoRedoPage
* filter and will also update bmap for free of the source
* dtpage), and upadte bmap for free of the source dtpage;
*/
tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
pxdlock = (struct pxd_lock *) & tlck->lock;
pxdlock->flag = mlckFREEPXD;
PXDaddress(&pxdlock->pxd, oxaddr);
PXDlength(&pxdlock->pxd, xlen);
pxdlock->index = 1;
/*
* 4. update the parent router entry for relocation;
*
* acquire tlck for the parent entry covering the target dtpage;
* write LOG_REDOPAGE to apply after image only;
*/
jfs_info("dtRelocate: update parent router entry.");
tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
dtlck = (struct dt_lock *) & tlck->lock;
lv = & dtlck->lv[dtlck->index];
/* update the PXD with the new address */
stbl = DT_GETSTBL(pp);
pxd = (pxd_t *) & pp->slot[stbl[index]];
PXDaddress(pxd, nxaddr);
lv->offset = stbl[index];
lv->length = 1;
dtlck->index++;
/* unpin the parent dtpage */
DT_PUTPAGE(pmp);
return rc;
}
/*
* NAME: dtSearchNode()
*
* FUNCTION: Search for an dtpage containing a specified address
* This function is mainly used by defragfs utility.
*
* NOTE: Search result on stack, the found page is pinned at exit.
* The result page must be an internal dtpage.
* lmxaddr give the address of the left most page of the
* dtree level, in which the required dtpage resides.
*/
static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
struct btstack * btstack)
{
int rc = 0;
s64 bn;
struct metapage *mp;
dtpage_t *p;
int psize = 288; /* initial in-line directory */
s8 *stbl;
int i;
pxd_t *pxd;
struct btframe *btsp;
BT_CLR(btstack); /* reset stack */
/*
* descend tree to the level with specified leftmost page
*
* by convention, root bn = 0.
*/
for (bn = 0;;) {
/* get/pin the page to search */
DT_GETPAGE(ip, bn, mp, psize, p, rc);
if (rc)
return rc;
/* does the xaddr of leftmost page of the levevl
* matches levevl search key ?
*/
if (p->header.flag & BT_ROOT) {
if (lmxaddr == 0)
break;
} else if (addressPXD(&p->header.self) == lmxaddr)
break;
/*
* descend down to leftmost child page
*/
if (p->header.flag & BT_LEAF) {
DT_PUTPAGE(mp);
return -ESTALE;
}
/* get the leftmost entry */
stbl = DT_GETSTBL(p);
pxd = (pxd_t *) & p->slot[stbl[0]];
/* get the child page block address */
bn = addressPXD(pxd);
psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
/* unpin the parent page */
DT_PUTPAGE(mp);
}
/*
* search each page at the current levevl
*/
loop:
stbl = DT_GETSTBL(p);
for (i = 0; i < p->header.nextindex; i++) {
pxd = (pxd_t *) & p->slot[stbl[i]];
/* found the specified router entry */
if (addressPXD(pxd) == addressPXD(kpxd) &&
lengthPXD(pxd) == lengthPXD(kpxd)) {
btsp = btstack->top;
btsp->bn = bn;
btsp->index = i;
btsp->mp = mp;
return 0;
}
}
/* get the right sibling page if any */
if (p->header.next)
bn = le64_to_cpu(p->header.next);
else {
DT_PUTPAGE(mp);
return -ESTALE;
}
/* unpin current page */
DT_PUTPAGE(mp);
/* get the right sibling page */
DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
if (rc)
return rc;
goto loop;
}
#endif /* _NOTYET */
/*
* dtRelink()
*
* function:
* link around a freed page.
*
* parameter:
* fp: page to be freed
*
* return:
*/
static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
{
int rc;
struct metapage *mp;
s64 nextbn, prevbn;
struct tlock *tlck;
struct dt_lock *dtlck;
struct lv *lv;
nextbn = le64_to_cpu(p->header.next);
prevbn = le64_to_cpu(p->header.prev);
/* update prev pointer of the next page */
if (nextbn != 0) {
DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
if (rc)
return rc;
BT_MARK_DIRTY(mp, ip);
/*
* acquire a transaction lock on the next page
*
* action: update prev pointer;
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
tlck, ip, mp);
dtlck = (struct dt_lock *) & tlck->lock;
/* linelock header */
if (dtlck->index >= dtlck->maxcnt)
dtlck = (struct dt_lock *) txLinelock(dtlck);
lv = & dtlck->lv[dtlck->index];
lv->offset = 0;
lv->length = 1;
dtlck->index++;
p->header.prev = cpu_to_le64(prevbn);
DT_PUTPAGE(mp);
}
/* update next pointer of the previous page */
if (prevbn != 0) {
DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
if (rc)
return rc;
BT_MARK_DIRTY(mp, ip);
/*
* acquire a transaction lock on the prev page
*
* action: update next pointer;
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
tlck, ip, mp);
dtlck = (struct dt_lock *) & tlck->lock;
/* linelock header */
if (dtlck->index >= dtlck->maxcnt)
dtlck = (struct dt_lock *) txLinelock(dtlck);
lv = & dtlck->lv[dtlck->index];
lv->offset = 0;
lv->length = 1;
dtlck->index++;
p->header.next = cpu_to_le64(nextbn);
DT_PUTPAGE(mp);
}
return 0;
}
/*
* dtInitRoot()
*
* initialize directory root (inline in inode)
*/
void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
{
struct jfs_inode_info *jfs_ip = JFS_IP(ip);
dtroot_t *p;
int fsi;
struct dtslot *f;
struct tlock *tlck;
struct dt_lock *dtlck;
struct lv *lv;
u16 xflag_save;
/*
* If this was previously an non-empty directory, we need to remove
* the old directory table.
*/
if (DO_INDEX(ip)) {
if (!jfs_dirtable_inline(ip)) {
struct tblock *tblk = tid_to_tblock(tid);
/*
* We're playing games with the tid's xflag. If
* we're removing a regular file, the file's xtree
* is committed with COMMIT_PMAP, but we always
* commit the directories xtree with COMMIT_PWMAP.
*/
xflag_save = tblk->xflag;
tblk->xflag = 0;
/*
* xtTruncate isn't guaranteed to fully truncate
* the xtree. The caller needs to check i_size
* after committing the transaction to see if
* additional truncation is needed. The
* COMMIT_Stale flag tells caller that we
* initiated the truncation.
*/
xtTruncate(tid, ip, 0, COMMIT_PWMAP);
set_cflag(COMMIT_Stale, ip);
tblk->xflag = xflag_save;
} else
ip->i_size = 1;
jfs_ip->next_index = 2;
} else
ip->i_size = IDATASIZE;
/*
* acquire a transaction lock on the root
*