xfsdump: fix race condition between lseek() and read()/write()

There's a race condition in the [get|put]_invtrecord() routines, because
a lseek() followed by a read()/write() is not atmoic, the file offset
might be changed before read()/write().

xfs/302 catches this failure as:
xfsdump: drive 1: INV : Unknown version 0 - Expected version 1
xfsdump: inv_core.c:66: get_counters: Assertion `((invt_counter_t *)(*cntpp))->ic_vernum == (inv_version_t) 1' failed.

And it can be reproduced by running multi-stream dump in a tight loop
  mount /dev/<dev> /mnt/xfs
  mkdir /mnt/xfs/dumpdir
  # populate dumpdir here
  while xfsdump -M l1 -M l2 -f d1 -f d2 -L ses /mnt/xfs -s dumpdir; do
  	:
  done

Fix it by replacing the "lseek(); read()/write()" sequence by
pread()/pwrite(), which make the seek and I/O an atomic operation.

Also convert all *_SEEKCUR routines to "SEEK_SET" variants and
remove the *_SEEKCUR macros, because they depend on the maintenance
of current file offset, but pread()/pwrite() don't change file
offset.

And in inventory/testmain.c, get|put_invtrecord() are called
directly, not from the GET|PUT_REC_* macros, so maintain the offset
explicitly there.

Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>

diff --git a/common/inventory.c b/common/inventory.c
index d1b810c..0e9c256 100644
--- a/common/inventory.c
+++ b/common/inventory.c
@@ -471,8 +471,8 @@
 	}
 			
 	if (dowrite) {
-		rval = PUT_REC_NOLOCK_SEEKCUR( fd, &strm, sizeof( invt_stream_t ),
-					       (off64_t) -(sizeof( invt_stream_t )) );
+		rval = PUT_REC_NOLOCK(fd, &strm, sizeof(invt_stream_t),
+				      tok->md_stream_off);
 	}
  end:
 	INVLOCK( fd, LOCK_UN );
diff --git a/inventory/inv_api.c b/inventory/inv_api.c
index acca40b..46fdde8 100644
--- a/inventory/inv_api.c
+++ b/inventory/inv_api.c
@@ -409,9 +409,8 @@
 		}
 			
 		if (dowrite) {
-			rval = PUT_REC_NOLOCK_SEEKCUR( fd, &strm, 
-			             sizeof( invt_stream_t ),
-				     -(off64_t)(sizeof( invt_stream_t )) );
+			rval = PUT_REC_NOLOCK(fd, &strm, sizeof(invt_stream_t),
+					      tok->md_stream_off);
 		}
 	}
 
diff --git a/inventory/inv_core.c b/inventory/inv_core.c
index a17c2c9..5ef519c 100644
--- a/inventory/inv_core.c
+++ b/inventory/inv_core.c
@@ -112,7 +112,7 @@
 
 int
 get_invtrecord( int fd, void *buf, size_t bufsz, off64_t off, 
-	        int whence, bool_t dolock )
+		bool_t dolock )
 {
 	int  nread;
 	
@@ -121,19 +121,10 @@
 	if ( dolock ) 
 		INVLOCK( fd, LOCK_SH );
 
-	if ( lseek( fd, (off_t)off, whence ) < 0 ) {
-		INV_PERROR( _("Error in reading inventory record "
-			      "(lseek failed): ") );
-		if ( dolock ) 
-			INVLOCK( fd, LOCK_UN );
-		return -1;
-	}
-	
-	nread = read( fd, buf, bufsz );
-
+	nread = pread(fd, buf, bufsz, (off_t)off);
 	if (  nread != (int) bufsz ) {
 		INV_PERROR( _("Error in reading inventory record :") );
-		if ( dolock ) 
+		if ( dolock )
 			INVLOCK( fd, LOCK_UN );
 		return -1;
 	}
@@ -154,23 +145,15 @@
 /*----------------------------------------------------------------------*/
 
 int
-put_invtrecord( int fd, void *buf, size_t bufsz, off64_t off, 
-	        int whence, bool_t dolock )
+put_invtrecord( int fd, void *buf, size_t bufsz, off64_t off, bool_t dolock )
 {
 	int nwritten;
 	
 	if ( dolock )
 		INVLOCK( fd, LOCK_EX );
 	
-	if ( lseek( fd, (off_t)off, whence ) < 0 ) {
-		INV_PERROR( _("Error in writing inventory record "
-			      "(lseek failed): ") );
-		if ( dolock ) 
-			INVLOCK( fd, LOCK_UN );
-		return -1;
-	}
-	
-	if (( nwritten = write( fd, buf, bufsz ) ) != (int) bufsz ) {
+	nwritten = pwrite(fd, buf, bufsz, (off_t)off);
+	if (nwritten != (int) bufsz ) {
 		INV_PERROR( _("Error in writing inventory record :") );
 		if ( dolock )
 			INVLOCK( fd, LOCK_UN );
diff --git a/inventory/inv_idx.c b/inventory/inv_idx.c
index 95529e8..cd9b9cb 100644
--- a/inventory/inv_idx.c
+++ b/inventory/inv_idx.c
@@ -341,8 +341,8 @@
 			      ent.ie_timeperiod.tp_start,
 			      ent.ie_timeperiod.tp_end );
 #endif
-	rval = PUT_REC_NOLOCK_SEEKCUR( fd, &ent, sizeof( invt_entry_t ),
-				      -(off64_t)(sizeof( invt_entry_t )));
+	rval = PUT_REC_NOLOCK(fd, &ent, sizeof(invt_entry_t),
+			      tok->sd_invtok->d_invindex_off);
 	
 #ifdef INVT_DEBUG
 	{
diff --git a/inventory/inv_priv.h b/inventory/inv_priv.h
index 1690271..aa94a33 100644
--- a/inventory/inv_priv.h
+++ b/inventory/inv_priv.h
@@ -298,13 +298,10 @@
 
 
 #define GET_REC( fd, buf, sz, off )  \
-                 get_invtrecord( fd, buf, sz, off, SEEK_SET, INVT_DOLOCK )
+                 get_invtrecord( fd, buf, sz, off, INVT_DOLOCK )
 
 #define GET_REC_NOLOCK( fd, buf, sz, off )  \
-                 get_invtrecord( fd, buf, sz, off, SEEK_SET, INVT_DONTLOCK )
-
-#define GET_REC_SEEKCUR( fd, buf, sz, off )  \
-                 get_invtrecord( fd, buf, sz, off, SEEK_CUR, INVT_DOLOCK )
+                 get_invtrecord( fd, buf, sz, off, INVT_DONTLOCK )
 
 #define GET_ALLHDRS_N_CNTS( fd, h, c, hsz, csz ) \
                  get_headerinfo( fd, h, c, hsz, csz, INVT_DOLOCK )
@@ -313,16 +310,10 @@
                  get_headerinfo( fd, h, c, hsz, csz, INVT_DONTLOCK )
 
 #define PUT_REC( fd, buf, sz, off )  \
-                 put_invtrecord( fd, buf, sz, off, SEEK_SET, INVT_DOLOCK )
+                 put_invtrecord( fd, buf, sz, off, INVT_DOLOCK )
 
 #define PUT_REC_NOLOCK( fd, buf, sz, off )  \
-                 put_invtrecord( fd, buf, sz, off, SEEK_SET, INVT_DONTLOCK )
-
-#define PUT_REC_SEEKCUR( fd, buf, sz, off )  \
-                 put_invtrecord( fd, buf, sz, off, SEEK_CUR, INVT_DOLOCK )
-
-#define PUT_REC_NOLOCK_SEEKCUR( fd, buf, sz, off )  \
-                 put_invtrecord( fd, buf, sz, off, SEEK_CUR, INVT_DONTLOCK )
+                 put_invtrecord( fd, buf, sz, off, INVT_DONTLOCK )
 
 
 #define GET_COUNTERS( fd, cnt ) get_counters( fd, (void **)(cnt), \
@@ -515,10 +506,10 @@
 get_invtentry( char *fname, time32_t tm, invt_entry_t *buf, size_t bufsz );
 
 int
-get_invtrecord( int fd, void *buf, size_t bufsz, off64_t off, int, bool_t dolock );
+get_invtrecord( int fd, void *buf, size_t bufsz, off64_t off, bool_t dolock );
 
 int
-put_invtrecord( int fd, void *buf, size_t bufsz, off64_t off, int, bool_t dolock );
+put_invtrecord( int fd, void *buf, size_t bufsz, off64_t off, bool_t dolock );
 
 inv_idbtoken_t
 get_token( int fd, int objfd );
diff --git a/inventory/testmain.c b/inventory/testmain.c
index ecddf54..05e3c02 100644
--- a/inventory/testmain.c
+++ b/inventory/testmain.c
@@ -89,6 +89,7 @@
 recons_test( int howmany )
 {
 	int fd, i, rval = 1;
+	off64_t off = 0;
 	
 	ses sarr[ SESLIM];
 	
@@ -96,14 +97,16 @@
 	
 	for ( i=0; i<howmany && i < SESLIM; i++ ){
 		rval = get_invtrecord( fd, &sarr[i], 
-				       sizeof( uuid_t ) + sizeof( size_t ), 0,
-				       SEEK_CUR, BOOL_FALSE );
+				       sizeof( uuid_t ) + sizeof( size_t ), off,
+				       BOOL_FALSE );
 		assert( rval > 0 );
 		assert( sarr[i].sz > 0 );
 		sarr[i].buf = calloc( 1,  sarr[i].sz );
-		rval = get_invtrecord( fd, sarr[i].buf,  sarr[i].sz, 0, SEEK_CUR,
+		off += (off64_t)(sizeof(uuid_t) + sizeof(size_t));
+		rval = get_invtrecord( fd, sarr[i].buf,  sarr[i].sz, off,
 				       BOOL_FALSE );
 		assert( rval > 0 );
+		off += sarr[i].sz;
 	}
 	
 	
@@ -132,8 +135,7 @@
 	fd = open( "moids", O_RDONLY );
 	if ( fd < 0 ) return -1;
 	
-	get_invtrecord( fd, &moid, sizeof(uuid_t), (n-1)* sizeof( uuid_t),
-		        SEEK_SET, 0 );
+	get_invtrecord( fd, &moid, sizeof(uuid_t), (n-1)* sizeof( uuid_t), 0 );
 	uuid_to_string( &moid, &str, &stat );
 	printf("Searching for Moid = %s\n", str );
 	free( str );
@@ -263,7 +265,11 @@
 	char strbuf[128];
 	void *bufp;
 	size_t sz;
+#ifdef RECONS
 	int rfd;
+	off64_t off;
+	struct stat64 statbuf;
+#endif
 
 #ifdef FIRSTTIME
 	printf("first time!\n");
@@ -285,6 +291,11 @@
 #ifdef RECONS
 	rfd = open( sesfile, O_RDWR | O_CREAT );
 	fchmod( rfd, INV_PERMS );
+	if (fstat64(fd, &statbuf) < 0) {
+		perror("fstat64 session file");
+		return -1;
+	}
+	off = (off64_t)statbuf.st_size;
 #endif
 
 	for ( i = 0; i < nsess; i++ ) {
@@ -325,12 +336,13 @@
 	
 #ifdef RECONS
 		if (inv_get_sessioninfo( tok2, &bufp, &sz ) == BOOL_TRUE ) {
-		      put_invtrecord( rfd, fsidp, sizeof( uuid_t ), 0, 
-				        SEEK_END, BOOL_FALSE );
-			
-			put_invtrecord( rfd, &sz, sizeof( size_t ), 0,
-				        SEEK_END, BOOL_FALSE); 
-			put_invtrecord( rfd, bufp, sz, 0, SEEK_END, BOOL_FALSE );
+			put_invtrecord( rfd, fsidp, sizeof( uuid_t ), off,
+					BOOL_FALSE );
+			off += (off64_t)sizeof(uuid_t);
+			put_invtrecord( rfd, &sz, sizeof( size_t ), off,
+					BOOL_FALSE);
+			off += (off64_t)sizeof(size_t);
+			put_invtrecord( rfd, bufp, sz, off, BOOL_FALSE );
 		}
 #endif
 #ifdef NOTDEF