| // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 |
| /* |
| * Copyright (C), 2022, Coolpad Group Limited. |
| * Created by Yue Hu <huyue2@coolpad.com> |
| */ |
| #ifndef _LARGEFILE_SOURCE |
| #define _LARGEFILE_SOURCE |
| #endif |
| #ifndef _LARGEFILE64_SOURCE |
| #define _LARGEFILE64_SOURCE |
| #endif |
| #ifndef _FILE_OFFSET_BITS |
| #define _FILE_OFFSET_BITS 64 |
| #endif |
| #ifndef _GNU_SOURCE |
| #define _GNU_SOURCE |
| #endif |
| #include <stdlib.h> |
| #include <unistd.h> |
| #include <sys/mman.h> |
| #include "erofs/err.h" |
| #include "erofs/inode.h" |
| #include "erofs/compress.h" |
| #include "erofs/print.h" |
| #include "erofs/internal.h" |
| #include "erofs/fragments.h" |
| |
| struct erofs_fragment_dedupe_item { |
| struct list_head list; |
| unsigned int length; |
| erofs_off_t pos; |
| u8 data[]; |
| }; |
| |
| #define EROFS_TOF_HASHLEN 16 |
| |
| #define FRAGMENT_HASHSIZE 65536 |
| #define FRAGMENT_HASH(c) ((c) & (FRAGMENT_HASHSIZE - 1)) |
| |
| static struct list_head dupli_frags[FRAGMENT_HASHSIZE]; |
| static FILE *packedfile; |
| const char *erofs_frags_packedname = "packed_file"; |
| |
| #ifndef HAVE_LSEEK64 |
| #define erofs_lseek64 lseek |
| #else |
| #define erofs_lseek64 lseek64 |
| #endif |
| |
| static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd, |
| u32 crc) |
| { |
| struct erofs_fragment_dedupe_item *cur, *di = NULL; |
| struct list_head *head; |
| u8 *data; |
| unsigned int length, e2, deduped; |
| erofs_off_t pos; |
| int ret; |
| |
| head = &dupli_frags[FRAGMENT_HASH(crc)]; |
| if (list_empty(head)) |
| return 0; |
| |
| /* XXX: no need to read so much for smaller? */ |
| if (inode->i_size < EROFS_CONFIG_COMPR_MAX_SZ) |
| length = inode->i_size; |
| else |
| length = EROFS_CONFIG_COMPR_MAX_SZ; |
| |
| data = malloc(length); |
| if (!data) |
| return -ENOMEM; |
| |
| if (erofs_lseek64(fd, inode->i_size - length, SEEK_SET) < 0) { |
| ret = -errno; |
| goto out; |
| } |
| |
| ret = read(fd, data, length); |
| if (ret != length) { |
| ret = -errno; |
| goto out; |
| } |
| |
| DBG_BUGON(length <= EROFS_TOF_HASHLEN); |
| e2 = length - EROFS_TOF_HASHLEN; |
| deduped = 0; |
| |
| list_for_each_entry(cur, head, list) { |
| unsigned int e1, mn, i = 0; |
| |
| DBG_BUGON(cur->length <= EROFS_TOF_HASHLEN); |
| e1 = cur->length - EROFS_TOF_HASHLEN; |
| |
| if (memcmp(cur->data + e1, data + e2, EROFS_TOF_HASHLEN)) |
| continue; |
| |
| mn = min(e1, e2); |
| while (i < mn && cur->data[e1 - i - 1] == data[e2 - i - 1]) |
| ++i; |
| |
| if (!di || i + EROFS_TOF_HASHLEN > deduped) { |
| deduped = i + EROFS_TOF_HASHLEN; |
| di = cur; |
| |
| /* full match */ |
| if (i == e2) |
| break; |
| } |
| } |
| if (!di) |
| goto out; |
| |
| DBG_BUGON(di->length < deduped); |
| pos = di->pos + di->length - deduped; |
| /* let's read more to dedupe as long as we can */ |
| if (deduped == di->length) { |
| fflush(packedfile); |
| |
| while(deduped < inode->i_size && pos) { |
| char buf[2][16384]; |
| unsigned int sz = min_t(unsigned int, pos, |
| sizeof(buf[0])); |
| |
| if (pread(fileno(packedfile), buf[0], sz, |
| pos - sz) != sz) |
| break; |
| if (pread(fd, buf[1], sz, |
| inode->i_size - deduped - sz) != sz) |
| break; |
| |
| if (memcmp(buf[0], buf[1], sz)) |
| break; |
| pos -= sz; |
| deduped += sz; |
| } |
| } |
| inode->fragment_size = deduped; |
| inode->fragmentoff = pos; |
| |
| erofs_dbg("Dedupe %llu tail data at %llu", inode->fragment_size | 0ULL, |
| inode->fragmentoff | 0ULL); |
| out: |
| free(data); |
| return ret; |
| } |
| |
| int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc) |
| { |
| u8 data_to_hash[EROFS_TOF_HASHLEN]; |
| int ret; |
| |
| if (inode->i_size <= EROFS_TOF_HASHLEN) |
| return 0; |
| |
| if (erofs_lseek64(fd, inode->i_size - EROFS_TOF_HASHLEN, SEEK_SET) < 0) |
| return -errno; |
| |
| ret = read(fd, data_to_hash, EROFS_TOF_HASHLEN); |
| if (ret != EROFS_TOF_HASHLEN) |
| return -errno; |
| |
| *tofcrc = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN); |
| ret = z_erofs_fragments_dedupe_find(inode, fd, *tofcrc); |
| if (ret < 0) |
| return ret; |
| ret = lseek(fd, 0, SEEK_SET); |
| if (ret < 0) |
| return -errno; |
| return 0; |
| } |
| |
| static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len, |
| erofs_off_t pos, u32 crc) |
| { |
| struct erofs_fragment_dedupe_item *di; |
| |
| if (len <= EROFS_TOF_HASHLEN) |
| return 0; |
| if (len > EROFS_CONFIG_COMPR_MAX_SZ) { |
| data += len - EROFS_CONFIG_COMPR_MAX_SZ; |
| pos += len - EROFS_CONFIG_COMPR_MAX_SZ; |
| len = EROFS_CONFIG_COMPR_MAX_SZ; |
| } |
| di = malloc(sizeof(*di) + len); |
| if (!di) |
| return -ENOMEM; |
| |
| memcpy(di->data, data, len); |
| di->length = len; |
| di->pos = pos; |
| |
| list_add_tail(&di->list, &dupli_frags[FRAGMENT_HASH(crc)]); |
| return 0; |
| } |
| |
| int z_erofs_fragments_init(void) |
| { |
| unsigned int i; |
| |
| for (i = 0; i < FRAGMENT_HASHSIZE; ++i) |
| init_list_head(&dupli_frags[i]); |
| return 0; |
| } |
| |
| void z_erofs_fragments_exit(void) |
| { |
| struct erofs_fragment_dedupe_item *di, *n; |
| struct list_head *head; |
| unsigned int i; |
| |
| for (i = 0; i < FRAGMENT_HASHSIZE; ++i) { |
| head = &dupli_frags[i]; |
| |
| list_for_each_entry_safe(di, n, head, list) |
| free(di); |
| } |
| } |
| |
| void z_erofs_fragments_commit(struct erofs_inode *inode) |
| { |
| if (!inode->fragment_size) |
| return; |
| /* |
| * If the packed inode is larger than 4GiB, the full fragmentoff |
| * will be recorded by switching to the noncompact layout anyway. |
| */ |
| if (inode->fragmentoff >> 32) |
| inode->datalayout = EROFS_INODE_COMPRESSED_FULL; |
| |
| inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; |
| erofs_sb_set_fragments(inode->sbi); |
| } |
| |
| int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, |
| u32 tofcrc) |
| { |
| #ifdef HAVE_FTELLO64 |
| off64_t offset = ftello64(packedfile); |
| #else |
| off_t offset = ftello(packedfile); |
| #endif |
| char *memblock; |
| int rc; |
| |
| if (offset < 0) |
| return -errno; |
| |
| inode->fragmentoff = (erofs_off_t)offset; |
| inode->fragment_size = inode->i_size; |
| |
| memblock = mmap(NULL, inode->i_size, PROT_READ, MAP_SHARED, fd, 0); |
| if (memblock == MAP_FAILED || !memblock) { |
| unsigned long long remaining = inode->fragment_size; |
| |
| memblock = NULL; |
| while (remaining) { |
| char buf[32768]; |
| unsigned int sz = min_t(unsigned int, remaining, |
| sizeof(buf)); |
| |
| rc = read(fd, buf, sz); |
| if (rc != sz) { |
| if (rc < 0) |
| rc = -errno; |
| else |
| rc = -EAGAIN; |
| goto out; |
| } |
| if (fwrite(buf, sz, 1, packedfile) != 1) { |
| rc = -EIO; |
| goto out; |
| } |
| remaining -= sz; |
| } |
| rc = lseek(fd, 0, SEEK_SET); |
| if (rc < 0) { |
| rc = -errno; |
| goto out; |
| } |
| } else if (fwrite(memblock, inode->fragment_size, 1, packedfile) != 1) { |
| rc = -EIO; |
| goto out; |
| } |
| |
| erofs_dbg("Recording %llu fragment data at %llu", |
| inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL); |
| |
| if (memblock) |
| rc = z_erofs_fragments_dedupe_insert(memblock, |
| inode->fragment_size, inode->fragmentoff, tofcrc); |
| else |
| rc = 0; |
| out: |
| if (memblock) |
| munmap(memblock, inode->i_size); |
| return rc; |
| } |
| |
| int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, |
| unsigned int len, u32 tofcrc) |
| { |
| #ifdef HAVE_FTELLO64 |
| off64_t offset = ftello64(packedfile); |
| #else |
| off_t offset = ftello(packedfile); |
| #endif |
| int ret; |
| |
| if (offset < 0) |
| return -errno; |
| |
| inode->fragmentoff = (erofs_off_t)offset; |
| inode->fragment_size = len; |
| |
| if (fwrite(data, len, 1, packedfile) != 1) |
| return -EIO; |
| |
| erofs_dbg("Recording %llu fragment data at %llu", |
| inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL); |
| |
| ret = z_erofs_fragments_dedupe_insert(data, len, inode->fragmentoff, |
| tofcrc); |
| if (ret) |
| return ret; |
| return len; |
| } |
| |
| int erofs_flush_packed_inode(struct erofs_sb_info *sbi) |
| { |
| struct erofs_inode *inode; |
| |
| if (!erofs_sb_has_fragments(sbi)) |
| return -EINVAL; |
| fflush(packedfile); |
| if (!ftello(packedfile)) |
| return 0; |
| |
| inode = erofs_mkfs_build_special_from_fd(sbi, fileno(packedfile), |
| EROFS_PACKED_INODE); |
| sbi->packed_nid = erofs_lookupnid(inode); |
| erofs_iput(inode); |
| return 0; |
| } |
| |
| void erofs_packedfile_exit(void) |
| { |
| if (packedfile) |
| fclose(packedfile); |
| } |
| |
| FILE *erofs_packedfile_init(void) |
| { |
| #ifdef HAVE_TMPFILE64 |
| packedfile = tmpfile64(); |
| #else |
| packedfile = tmpfile(); |
| #endif |
| if (!packedfile) |
| return ERR_PTR(-ENOMEM); |
| return packedfile; |
| } |