blob: dacf8dc3843a6848780ae13db3c68b94faadf457 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
/*
* Copyright (C) 2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Li Guifu <bluce.liguifu@huawei.com>
*/
#ifndef _LARGEFILE64_SOURCE
#define _LARGEFILE64_SOURCE
#endif
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include "erofs/internal.h"
#ifdef HAVE_LINUX_FS_H
#include <linux/fs.h>
#endif
#ifdef HAVE_LINUX_FALLOC_H
#include <linux/falloc.h>
#endif
#ifdef HAVE_SYS_STATFS_H
#include <sys/statfs.h>
#endif
#define EROFS_MODNAME "erofs_io"
#include "erofs/print.h"
int erofs_io_fstat(struct erofs_vfile *vf, struct stat *buf)
{
if (__erofs_unlikely(cfg.c_dry_run)) {
buf->st_size = 0;
buf->st_mode = S_IFREG | 0777;
return 0;
}
if (vf->ops)
return vf->ops->fstat(vf, buf);
return fstat(vf->fd, buf);
}
ssize_t erofs_io_pwrite(struct erofs_vfile *vf, const void *buf,
u64 pos, size_t len)
{
ssize_t ret, written = 0;
if (__erofs_unlikely(cfg.c_dry_run))
return 0;
if (vf->ops)
return vf->ops->pwrite(vf, buf, pos, len);
pos += vf->offset;
do {
#ifdef HAVE_PWRITE64
ret = pwrite64(vf->fd, buf, len, (off64_t)pos);
#else
ret = pwrite(vf->fd, buf, len, (off_t)pos);
#endif
if (ret <= 0) {
if (!ret)
break;
if (errno != EINTR) {
erofs_err("failed to write: %s", strerror(errno));
return -errno;
}
ret = 0;
}
buf += ret;
pos += ret;
written += ret;
} while (written < len);
return written;
}
int erofs_io_fsync(struct erofs_vfile *vf)
{
int ret;
if (__erofs_unlikely(cfg.c_dry_run))
return 0;
if (vf->ops)
return vf->ops->fsync(vf);
ret = fsync(vf->fd);
if (ret) {
erofs_err("failed to fsync(!): %s", strerror(errno));
return -errno;
}
return 0;
}
ssize_t erofs_io_fallocate(struct erofs_vfile *vf, u64 offset,
size_t len, bool zeroout)
{
static const char zero[EROFS_MAX_BLOCK_SIZE] = {0};
ssize_t ret;
if (__erofs_unlikely(cfg.c_dry_run))
return 0;
if (vf->ops)
return vf->ops->fallocate(vf, offset, len, zeroout);
#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
if (!zeroout && fallocate(vf->fd, FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_KEEP_SIZE, offset + vf->offset, len) >= 0)
return 0;
#endif
while (len > EROFS_MAX_BLOCK_SIZE) {
ret = erofs_io_pwrite(vf, zero, offset, EROFS_MAX_BLOCK_SIZE);
if (ret < 0)
return ret;
len -= ret;
offset += ret;
}
return erofs_io_pwrite(vf, zero, offset, len) == len ? 0 : -EIO;
}
int erofs_io_ftruncate(struct erofs_vfile *vf, u64 length)
{
int ret;
struct stat st;
if (__erofs_unlikely(cfg.c_dry_run))
return 0;
if (vf->ops)
return vf->ops->ftruncate(vf, length);
ret = fstat(vf->fd, &st);
if (ret) {
erofs_err("failed to fstat: %s", strerror(errno));
return -errno;
}
length += vf->offset;
if (S_ISBLK(st.st_mode) || st.st_size == length)
return 0;
return ftruncate(vf->fd, length);
}
ssize_t erofs_io_pread(struct erofs_vfile *vf, void *buf, u64 pos, size_t len)
{
ssize_t ret, read = 0;
if (__erofs_unlikely(cfg.c_dry_run))
return 0;
if (vf->ops)
return vf->ops->pread(vf, buf, pos, len);
pos += vf->offset;
do {
#ifdef HAVE_PREAD64
ret = pread64(vf->fd, buf, len, (off64_t)pos);
#else
ret = pread(vf->fd, buf, len, (off_t)pos);
#endif
if (ret <= 0) {
if (!ret)
break;
if (errno != EINTR) {
erofs_err("failed to read: %s", strerror(errno));
return -errno;
}
ret = 0;
}
pos += ret;
buf += ret;
read += ret;
} while (read < len);
return read;
}
static int erofs_get_bdev_size(int fd, u64 *bytes)
{
errno = ENOTSUP;
#ifdef BLKGETSIZE64
if (ioctl(fd, BLKGETSIZE64, bytes) >= 0)
return 0;
#endif
#ifdef BLKGETSIZE
{
unsigned long size;
if (ioctl(fd, BLKGETSIZE, &size) >= 0) {
*bytes = ((u64)size << 9);
return 0;
}
}
#endif
return -errno;
}
#if defined(__linux__) && !defined(BLKDISCARD)
#define BLKDISCARD _IO(0x12, 119)
#endif
static int erofs_bdev_discard(int fd, u64 block, u64 count)
{
#ifdef BLKDISCARD
u64 range[2] = { block, count };
return ioctl(fd, BLKDISCARD, &range);
#else
return -EOPNOTSUPP;
#endif
}
int erofs_dev_open(struct erofs_sb_info *sbi, const char *dev, int flags)
{
bool ro = (flags & O_ACCMODE) == O_RDONLY;
bool truncate = flags & O_TRUNC;
struct stat st;
int fd, ret;
#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
bool again = false;
repeat:
#endif
fd = open(dev, (ro ? O_RDONLY : O_RDWR | O_CREAT) | O_BINARY, 0644);
if (fd < 0) {
erofs_err("failed to open %s: %s", dev, strerror(errno));
return -errno;
}
if (ro || !truncate)
goto out;
ret = fstat(fd, &st);
if (ret) {
erofs_err("failed to fstat(%s): %s", dev, strerror(errno));
close(fd);
return -errno;
}
switch (st.st_mode & S_IFMT) {
case S_IFBLK:
ret = erofs_get_bdev_size(fd, &sbi->devsz);
if (ret) {
erofs_err("failed to get block device size(%s): %s",
dev, strerror(errno));
close(fd);
return ret;
}
sbi->devsz = round_down(sbi->devsz, erofs_blksiz(sbi));
ret = erofs_bdev_discard(fd, 0, sbi->devsz);
if (ret)
erofs_err("failed to erase block device(%s): %s",
dev, erofs_strerror(ret));
break;
case S_IFREG:
if (st.st_size) {
#if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
struct statfs stfs;
if (again) {
close(fd);
return -ENOTEMPTY;
}
/*
* fses like EXT4 and BTRFS will flush dirty blocks
* after truncate(0) even after the writeback happens
* (see kernel commit 7d8f9f7d150d and ccd2506bd431),
* which is NOT our intention. Let's work around this.
*/
if (!fstatfs(fd, &stfs) && (stfs.f_type == 0xEF53 ||
stfs.f_type == 0x9123683E)) {
close(fd);
unlink(dev);
again = true;
goto repeat;
}
#endif
ret = ftruncate(fd, 0);
if (ret) {
erofs_err("failed to ftruncate(%s).", dev);
close(fd);
return -errno;
}
}
sbi->devblksz = st.st_blksize;
break;
default:
erofs_err("bad file type (%s, %o).", dev, st.st_mode);
close(fd);
return -EINVAL;
}
out:
sbi->devname = strdup(dev);
if (!sbi->devname) {
close(fd);
return -ENOMEM;
}
sbi->bdev.fd = fd;
erofs_info("successfully to open %s", dev);
return 0;
}
void erofs_dev_close(struct erofs_sb_info *sbi)
{
if (!sbi->bdev.ops)
close(sbi->bdev.fd);
free(sbi->devname);
sbi->devname = NULL;
sbi->bdev.fd = -1;
}
void erofs_blob_closeall(struct erofs_sb_info *sbi)
{
unsigned int i;
for (i = 0; i < sbi->nblobs; ++i)
close(sbi->blobfd[i]);
sbi->nblobs = 0;
}
int erofs_blob_open_ro(struct erofs_sb_info *sbi, const char *dev)
{
int fd = open(dev, O_RDONLY | O_BINARY);
if (fd < 0) {
erofs_err("failed to open(%s).", dev);
return -errno;
}
sbi->blobfd[sbi->nblobs] = fd;
erofs_info("successfully to open blob%u %s", sbi->nblobs, dev);
++sbi->nblobs;
return 0;
}
ssize_t erofs_dev_read(struct erofs_sb_info *sbi, int device_id,
void *buf, u64 offset, size_t len)
{
ssize_t read;
if (device_id) {
if (device_id > sbi->nblobs) {
erofs_err("invalid device id %d", device_id);
return -EIO;
}
read = erofs_io_pread(&((struct erofs_vfile) {
.fd = sbi->blobfd[device_id - 1],
}), buf, offset, len);
} else {
read = erofs_io_pread(&sbi->bdev, buf, offset, len);
}
if (read < 0)
return read;
if (read < len) {
erofs_info("reach EOF of device @ %llu, pading with zeroes",
offset | 0ULL);
memset(buf + read, 0, len - read);
}
return 0;
}
static ssize_t __erofs_copy_file_range(int fd_in, u64 *off_in,
int fd_out, u64 *off_out,
size_t length)
{
size_t copied = 0;
char buf[8192];
/*
* Main copying loop. The buffer size is arbitrary and is a
* trade-off between stack size consumption, cache usage, and
* amortization of system call overhead.
*/
while (length > 0) {
size_t to_read;
ssize_t read_count;
char *end, *p;
to_read = min_t(size_t, length, sizeof(buf));
#ifdef HAVE_PREAD64
read_count = pread64(fd_in, buf, to_read, *off_in);
#else
read_count = pread(fd_in, buf, to_read, *off_in);
#endif
if (read_count == 0)
/* End of file reached prematurely. */
return copied;
if (read_count < 0) {
/* Report the number of bytes copied so far. */
if (copied > 0)
return copied;
return -1;
}
*off_in += read_count;
/* Write the buffer part which was read to the destination. */
end = buf + read_count;
for (p = buf; p < end; ) {
ssize_t write_count;
#ifdef HAVE_PWRITE64
write_count = pwrite64(fd_out, p, end - p, *off_out);
#else
write_count = pwrite(fd_out, p, end - p, *off_out);
#endif
if (write_count < 0) {
/*
* Adjust the input read position to match what
* we have written, so that the caller can pick
* up after the error.
*/
size_t written = p - buf;
/*
* NB: This needs to be signed so that we can
* form the negative value below.
*/
ssize_t overread = read_count - written;
*off_in -= overread;
/* Report the number of bytes copied so far. */
if (copied + written > 0)
return copied + written;
return -1;
}
p += write_count;
*off_out += write_count;
} /* Write loop. */
copied += read_count;
length -= read_count;
}
return copied;
}
ssize_t erofs_copy_file_range(int fd_in, u64 *off_in, int fd_out, u64 *off_out,
size_t length)
{
#ifdef HAVE_COPY_FILE_RANGE
off64_t off64_in = *off_in, off64_out = *off_out;
ssize_t ret;
ret = copy_file_range(fd_in, &off64_in, fd_out, &off64_out,
length, 0);
if (ret >= 0)
goto out;
if (errno != ENOSYS && errno != EXDEV) {
ret = -errno;
out:
*off_in = off64_in;
*off_out = off64_out;
return ret;
}
#endif
return __erofs_copy_file_range(fd_in, off_in, fd_out, off_out, length);
}
ssize_t erofs_io_read(struct erofs_vfile *vf, void *buf, size_t bytes)
{
ssize_t i = 0;
if (vf->ops)
return vf->ops->read(vf, buf, bytes);
while (bytes) {
int len = bytes > INT_MAX ? INT_MAX : bytes;
int ret;
ret = read(vf->fd, buf + i, len);
if (ret < 1) {
if (ret == 0) {
break;
} else if (errno != EINTR) {
erofs_err("failed to read : %s",
strerror(errno));
return -errno;
}
}
bytes -= ret;
i += ret;
}
return i;
}
#ifdef HAVE_SYS_SENDFILE_H
#include <sys/sendfile.h>
#endif
off_t erofs_io_lseek(struct erofs_vfile *vf, u64 offset, int whence)
{
if (vf->ops)
return vf->ops->lseek(vf, offset, whence);
return lseek(vf->fd, offset, whence);
}
int erofs_io_xcopy(struct erofs_vfile *vout, off_t pos,
struct erofs_vfile *vin, unsigned int len, bool noseek)
{
if (vout->ops)
return vout->ops->xcopy(vout, pos, vin, len, noseek);
if (len && !vin->ops) {
off_t ret __maybe_unused;
#ifdef HAVE_COPY_FILE_RANGE
ret = copy_file_range(vin->fd, NULL, vout->fd, &pos, len, 0);
if (ret > 0)
len -= ret;
#endif
#if defined(HAVE_SYS_SENDFILE_H) && defined(HAVE_SENDFILE)
if (len && !noseek) {
ret = lseek(vout->fd, pos, SEEK_SET);
if (ret == pos) {
ret = sendfile(vout->fd, vin->fd, NULL, len);
if (ret > 0) {
pos += ret;
len -= ret;
}
}
}
#endif
}
do {
char buf[32768];
int ret = min_t(unsigned int, len, sizeof(buf));
ret = erofs_io_read(vin, buf, ret);
if (ret < 0)
return ret;
if (ret > 0) {
ret = erofs_io_pwrite(vout, buf, pos, ret);
if (ret < 0)
return ret;
pos += ret;
}
len -= ret;
} while (len);
return 0;
}