| /* |
| * linux/fs/umsdos/mangle.c |
| * |
| * Written 1993 by Jacques Gelinas |
| * |
| * Control the mangling of file name to fit msdos name space. |
| * Many optimisations by GLU == dglaude@is1.vub.ac.be (Glaude David) |
| */ |
| |
| #include <linux/errno.h> |
| #include <linux/string.h> |
| #include <linux/kernel.h> |
| #include <linux/umsdos_fs.h> |
| |
| /* (This file is used outside of the kernel) */ |
| #ifndef __KERNEL__ |
| #define KERN_WARNING |
| #endif |
| |
| /* |
| * Complete the mangling of the MSDOS fake name |
| * based on the position of the entry in the EMD file. |
| * |
| * Simply complete the job of umsdos_parse; fill the extension. |
| * |
| * Beware that info->f_pos must be set. |
| */ |
| void umsdos_manglename (struct umsdos_info *info) |
| { |
| if (info->msdos_reject) { |
| /* #Specification: file name / non MSDOS conforming / mangling |
| * Each non MSDOS conforming file has a special extension |
| * build from the entry position in the EMD file. |
| * |
| * This number is then transform in a base 32 number, where |
| * each digit is expressed like hexadecimal number, using |
| * digit and letter, except it uses 22 letters from 'a' to 'v'. |
| * The number 32 comes from 2**5. It is faster to split a binary |
| * number using a base which is a power of two. And I was 32 |
| * when I started this project. Pick your answer :-) . |
| * |
| * If the result is '0', it is replace with '_', simply |
| * to make it odd. |
| * |
| * This is true for the first two character of the extension. |
| * The last one is taken from a list of odd character, which |
| * are: |
| * |
| * { } ( ) ! ` ^ & @ |
| * |
| * With this scheme, we can produce 9216 ( 9* 32 * 32) |
| * different extensions which should not clash with any useful |
| * extension already popular or meaningful. Since most directory |
| * have much less than 32 * 32 files in it, the first character |
| * of the extension of any mangled name will be {. |
| * |
| * Here are the reason to do this (this kind of mangling). |
| * |
| * -The mangling is deterministic. Just by the extension, we |
| * are able to locate the entry in the EMD file. |
| * |
| * -By keeping to beginning of the file name almost unchanged, |
| * we are helping the MSDOS user. |
| * |
| * -The mangling produces names not too ugly, so an msdos user |
| * may live with it (remember it, type it, etc...). |
| * |
| * -The mangling produces names ugly enough so no one will |
| * ever think of using such a name in real life. This is not |
| * fool proof. I don't think there is a total solution to this. |
| */ |
| int entry_num; |
| char *pt = info->fake.fname + info->fake.len; |
| /* lookup for encoding the last character of the extension |
| * It contains valid character after the ugly one to make sure |
| * even if someone overflows the 32 * 32 * 9 limit, it still |
| * does something |
| */ |
| #define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@' |
| static char lookup3[] = |
| { |
| SPECIAL_MANGLING, |
| /* This is the start of lookup12 */ |
| '_', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
| 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', |
| 'p', 'q', 'r', 's', 't', 'u', 'v' |
| }; |
| |
| #define lookup12 (lookup3+9) |
| entry_num = info->f_pos / UMSDOS_REC_SIZE; |
| if (entry_num > (9* 32 * 32)){ |
| printk (KERN_WARNING "UMSDOS: more than 9216 files in a directory.\n" |
| "This may break the mangling strategy.\n" |
| "Not a killer problem. See doc.\n"); |
| } |
| *pt++ = '.'; |
| *pt++ = lookup3 [(entry_num >> 10) & 31]; |
| *pt++ = lookup12[(entry_num >> 5) & 31]; |
| *pt++ = lookup12[entry_num & 31]; |
| *pt = '\0'; /* help doing printk */ |
| info->fake.len += 4; |
| info->msdos_reject = 0; /* Avoid mangling twice */ |
| } |
| } |
| |
| /* |
| * Evaluate the record size needed to store of name of len character. |
| * The value returned is a multiple of UMSDOS_REC_SIZE. |
| */ |
| int umsdos_evalrecsize (int len) |
| { |
| struct umsdos_dirent dirent; |
| int nbrec = 1 + ((len - 1 + (dirent.name - (char *) &dirent)) |
| / UMSDOS_REC_SIZE); |
| |
| return nbrec * UMSDOS_REC_SIZE; |
| /* |
| * GLU This should be inlined or something to speed it up to the max. |
| * GLU nbrec is absolutely not needed to return the value. |
| */ |
| } |
| #ifdef TEST |
| int umsdos_evalrecsize_old (int len) |
| { |
| struct umsdos_dirent dirent; |
| int size = len + (dirent.name - (char *) &dirent); |
| int nbrec = size / UMSDOS_REC_SIZE; |
| int extra = size % UMSDOS_REC_SIZE; |
| |
| if (extra > 0) |
| nbrec++; |
| return nbrec * UMSDOS_REC_SIZE; |
| } |
| #endif |
| |
| |
| /* |
| * Fill the struct info with the full and msdos name of a file |
| * Return 0 if all is OK, a negative error code otherwise. |
| */ |
| int umsdos_parse ( |
| const char *fname, |
| int len, |
| struct umsdos_info *info) |
| { |
| int ret = -ENAMETOOLONG; |
| |
| /* #Specification: file name / too long |
| * If a file name exceed UMSDOS maxima, the file name is silently |
| * truncated. This makes it conformant with the other file system |
| * of Linux (minix and ext2 at least). |
| */ |
| if (len > UMSDOS_MAXNAME) |
| len = UMSDOS_MAXNAME; |
| { |
| const char *firstpt = NULL; /* First place we saw a "." in fname */ |
| |
| /* #Specification: file name / non MSDOS conforming / base length 0 |
| * file names beginning with a period '.' are invalid for MS-DOS. |
| * It needs absolutely a base name. So the file name is mangled |
| */ |
| int ivldchar = fname[0] == '.'; /* At least one invalid character */ |
| int msdos_len = len; |
| int base_len; |
| |
| /* |
| * cardinal_per_size tells if there exists at least one |
| * DOS pseudo device on length n. See the test below. |
| */ |
| static const char cardinal_per_size[9] = |
| { |
| 0, 0, 0, 1, 1, 0, 1, 0, 1 |
| }; |
| |
| /* |
| * lkp translate all character to acceptable character (for DOS). |
| * When lkp[n] == n, it means also it is an acceptable one. |
| * So it serves both as a flag and as a translator. |
| */ |
| static char lkp[256]; |
| static char is_init = 0; |
| |
| if (!is_init) { |
| /* |
| * Initialisation of the array is easier and less error |
| * prone like this. |
| */ |
| int i; |
| static const char *spc = "\"*+,/:;<=>?[\\]|~"; |
| |
| is_init = 1; |
| for (i = 0; i <= 32; i++) |
| lkp[i] = '#'; |
| for (i = 33; i < 'A'; i++) |
| lkp[i] = (char) i; |
| for (i = 'A'; i <= 'Z'; i++) |
| lkp[i] = (char) (i + ('a' - 'A')); |
| for (i = 'Z' + 1; i < 127; i++) |
| lkp[i] = (char) i; |
| for (i = 128; i < 256; i++) |
| lkp[i] = '#'; |
| |
| lkp['.'] = '_'; |
| while (*spc != '\0') |
| lkp[(unsigned char) (*spc++)] = '#'; |
| } |
| /* GLU |
| * File names longer than 8+'.'+3 are invalid for MS-DOS, |
| * so the file name is to be mangled--no further test is needed. |
| * This speeds up handling of long names. |
| * The position of the last point is no more necessary anyway. |
| */ |
| if (len <= (8 + 1 + 3)) { |
| const char *pt = fname; |
| const char *endpt = fname + len; |
| |
| while (pt < endpt) { |
| if (*pt == '.') { |
| if (firstpt != NULL) { |
| /* 2 . in a file name. Reject */ |
| ivldchar = 1; |
| break; |
| } else { |
| int extlen = (int) (endpt - pt); |
| |
| firstpt = pt; |
| if (firstpt - fname > 8) { |
| /* base name longer than 8: reject */ |
| ivldchar = 1; |
| break; |
| } else if (extlen > 4) { |
| /* Extension longer than 4 (including .): reject */ |
| ivldchar = 1; |
| break; |
| } else if (extlen == 1) { |
| /* #Specification: file name / non MSDOS conforming / last char == . |
| * If the last character of a file name is |
| * a period, mangling is applied. MS-DOS does |
| * not support those file names. |
| */ |
| ivldchar = 1; |
| break; |
| } else if (extlen == 4) { |
| /* #Specification: file name / non MSDOS conforming / mangling clash |
| * To avoid clash with the umsdos mangling, any file |
| * with a special character as the first character |
| * of the extension will be mangled. This solves the |
| * following problem: |
| * |
| * # |
| * touch FILE |
| * # FILE is invalid for DOS, so mangling is applied |
| * # file.{_1 is created in the DOS directory |
| * touch file.{_1 |
| * # To UMSDOS file point to a single DOS entry. |
| * # So file.{_1 has to be mangled. |
| * # |
| */ |
| static char special[] = |
| { |
| SPECIAL_MANGLING, '\0' |
| }; |
| |
| if (strchr (special, firstpt[1]) != NULL) { |
| ivldchar = 1; |
| break; |
| } |
| } |
| } |
| } else if (lkp[(unsigned char) (*pt)] != *pt) { |
| ivldchar = 1; |
| break; |
| } |
| pt++; |
| } |
| } else { |
| ivldchar = 1; |
| } |
| if (ivldchar |
| || (firstpt == NULL && len > 8) |
| || (len == UMSDOS_EMD_NAMELEN |
| && memcmp (fname, UMSDOS_EMD_FILE, UMSDOS_EMD_NAMELEN) == 0)) { |
| /* #Specification: file name / --linux-.--- |
| * The name of the EMD file --linux-.--- is map to a mangled |
| * name. So UMSDOS does not restrict its use. |
| */ |
| /* #Specification: file name / non MSDOS conforming / mangling |
| * Non MSDOS conforming file names must use some alias to fit |
| * in the MSDOS name space. |
| * |
| * The strategy is simple. The name is simply truncated to |
| * 8 char. points are replace with underscore and a |
| * number is given as an extension. This number correspond |
| * to the entry number in the EMD file. The EMD file |
| * only need to carry the real name. |
| * |
| * Upper case is also converted to lower case. |
| * Control character are converted to #. |
| * Spaces are converted to #. |
| * The following characters are also converted to #. |
| * # |
| * " * + , / : ; < = > ? [ \ ] | ~ |
| * # |
| * |
| * Sometimes the problem is not in MS-DOS itself but in |
| * command.com. |
| */ |
| int i; |
| char *pt = info->fake.fname; |
| |
| base_len = msdos_len = (msdos_len > 8) ? 8 : msdos_len; |
| /* |
| * There is no '.' any more so we know for a fact that |
| * the base length is the length. |
| */ |
| memcpy (info->fake.fname, fname, msdos_len); |
| for (i = 0; i < msdos_len; i++, pt++) |
| *pt = lkp[(unsigned char) (*pt)]; |
| *pt = '\0'; /* GLU We force null termination. */ |
| info->msdos_reject = 1; |
| /* |
| * The numeric extension is added only when we know |
| * the position in the EMD file, in umsdos_newentry(), |
| * umsdos_delentry(), and umsdos_findentry(). |
| * See umsdos_manglename(). |
| */ |
| } else { |
| /* Conforming MSDOS file name */ |
| strncpy (info->fake.fname, fname, len); |
| info->msdos_reject = 0; |
| base_len = firstpt != NULL ? (int) (firstpt - fname) : len; |
| } |
| if (cardinal_per_size[base_len]) { |
| /* #Specification: file name / MSDOS devices / mangling |
| * To avoid unreachable file from MS-DOS, any MS-DOS conforming |
| * file with a basename equal to one of the MS-DOS pseudo |
| * devices will be mangled. |
| * |
| * If a file such as "prn" was created, it would be unreachable |
| * under MS-DOS because "prn" is assumed to be the printer, even |
| * if the file does have an extension. |
| * |
| * Since the extension is unimportant to MS-DOS, we must patch |
| * the basename also. We simply insert a minus '-'. To avoid |
| * conflict with valid file with a minus in front (such as |
| * "-prn"), we add an mangled extension like any other |
| * mangled file name. |
| * |
| * Here is the list of DOS pseudo devices: |
| * |
| * # |
| * "prn","con","aux","nul", |
| * "lpt1","lpt2","lpt3","lpt4", |
| * "com1","com2","com3","com4", |
| * "clock$" |
| * # |
| * |
| * and some standard ones for common DOS programs |
| * |
| * "emmxxxx0","xmsxxxx0","setverxx" |
| * |
| * (Thanks to Chris Hall <cah17@phoenix.cambridge.ac.uk> |
| * for pointing these out to me). |
| * |
| * Is there one missing? |
| */ |
| /* This table must be ordered by length */ |
| static const char *tbdev[] = |
| { |
| "prn", "con", "aux", "nul", |
| "lpt1", "lpt2", "lpt3", "lpt4", |
| "com1", "com2", "com3", "com4", |
| "clock$", |
| "emmxxxx0", "xmsxxxx0", "setverxx" |
| }; |
| |
| /* Tell where to find in tbdev[], the first name of */ |
| /* a certain length */ |
| static const char start_ind_dev[9] = |
| { |
| 0, 0, 0, 4, 12, 12, 13, 13, 16 |
| }; |
| char basen[9]; |
| int i; |
| |
| for (i = start_ind_dev[base_len - 1]; i < start_ind_dev[base_len]; i++) { |
| if (memcmp (info->fake.fname, tbdev[i], base_len) == 0) { |
| memcpy (basen, info->fake.fname, base_len); |
| basen[base_len] = '\0'; /* GLU We force null termination. */ |
| /* |
| * GLU We do that only if necessary; we try to do the |
| * GLU simple thing in the usual circumstance. |
| */ |
| info->fake.fname[0] = '-'; |
| strcpy (info->fake.fname + 1, basen); /* GLU We already guaranteed a null would be at the end. */ |
| msdos_len = (base_len == 8) ? 8 : base_len + 1; |
| info->msdos_reject = 1; |
| break; |
| } |
| } |
| } |
| info->fake.fname[msdos_len] = '\0'; /* Help doing printk */ |
| /* GLU This zero should (always?) be there already. */ |
| info->fake.len = msdos_len; |
| /* Why not use info->fake.len everywhere? Is it longer? |
| */ |
| memcpy (info->entry.name, fname, len); |
| info->entry.name[len] = '\0'; /* for printk */ |
| info->entry.name_len = len; |
| ret = 0; |
| } |
| /* |
| * Evaluate how many records are needed to store this entry. |
| */ |
| info->recsize = umsdos_evalrecsize (len); |
| return ret; |
| } |
| |
| #ifdef TEST |
| |
| struct MANG_TEST { |
| char *fname; /* Name to validate */ |
| int msdos_reject; /* Expected msdos_reject flag */ |
| char *msname; /* Expected msdos name */ |
| }; |
| |
| struct MANG_TEST tb[] = |
| { |
| "hello", 0, "hello", |
| "hello.1", 0, "hello.1", |
| "hello.1_", 0, "hello.1_", |
| "prm", 0, "prm", |
| |
| #ifdef PROPOSITION |
| "HELLO", 1, "hello", |
| "Hello.1", 1, "hello.1", |
| "Hello.c", 1, "hello.c", |
| #else |
| /* |
| * I find the three examples below very unfortunate. I propose to |
| * convert them to lower case in a quick preliminary pass, then test |
| * whether there are other troublesome characters. I have not made |
| * this change, because it is not easy, but I wanted to mention the |
| * principle. Obviously something like that would increase the chance |
| * of collisions, for example between "HELLO" and "Hello", but these |
| * can be treated elsewhere along with the other collisions. |
| */ |
| |
| "HELLO", 1, "hello", |
| "Hello.1", 1, "hello_1", |
| "Hello.c", 1, "hello_c", |
| #endif |
| |
| "hello.{_1", 1, "hello_{_", |
| "hello\t", 1, "hello#", |
| "hello.1.1", 1, "hello_1_", |
| "hel,lo", 1, "hel#lo", |
| "Salut.Tu.vas.bien?", 1, "salut_tu", |
| ".profile", 1, "_profile", |
| ".xv", 1, "_xv", |
| "toto.", 1, "toto_", |
| "clock$.x", 1, "-clock$", |
| "emmxxxx0", 1, "-emmxxxx", |
| "emmxxxx0.abcd", 1, "-emmxxxx", |
| "aux", 1, "-aux", |
| "prn", 1, "-prn", |
| "prn.abc", 1, "-prn", |
| "PRN", 1, "-prn", |
| /* |
| * GLU WARNING: the results of these are different with my version |
| * GLU of mangling compared to the original one. |
| * GLU CAUSE: the manner of calculating the baselen variable. |
| * GLU For you they are always 3. |
| * GLU For me they are respectively 7, 8, and 8. |
| |
| */ |
| "PRN.abc", 1, "prn_abc", |
| "Prn.abcd", 1, "prn_abcd", |
| "prn.abcd", 1, "prn_abcd", |
| "Prn.abcdefghij", 1, "prn_abcd" |
| }; |
| |
| int main (int argc, char *argv[]) |
| { |
| int i, rold, rnew; |
| |
| printf ("Testing the umsdos_parse.\n"); |
| for (i = 0; i < sizeof (tb) / sizeof (tb[0]); i++) { |
| struct MANG_TEST *pttb = tb + i; |
| struct umsdos_info info; |
| int ok = umsdos_parse (pttb->fname, strlen (pttb->fname), &info); |
| |
| if (strcmp (info.fake.fname, pttb->msname) != 0) { |
| printf ("**** %s -> ", pttb->fname); |
| printf ("%s <> %s\n", info.fake.fname, pttb->msname); |
| } else if (info.msdos_reject != pttb->msdos_reject) { |
| printf ("**** %s -> %s ", pttb->fname, pttb->msname); |
| printf ("%d <> %d\n", info.msdos_reject, pttb->msdos_reject); |
| } else { |
| printf (" %s -> %s %d\n", pttb->fname, pttb->msname |
| ,pttb->msdos_reject); |
| } |
| } |
| printf ("Testing the new umsdos_evalrecsize."); |
| for (i = 0; i < UMSDOS_MAXNAME; i++) { |
| rnew = umsdos_evalrecsize (i); |
| rold = umsdos_evalrecsize_old (i); |
| if (!(i % UMSDOS_REC_SIZE)) { |
| printf ("\n%d:\t", i); |
| } |
| if (rnew != rold) { |
| printf ("**** %d newres: %d != %d \n", i, rnew, rold); |
| } else { |
| printf ("."); |
| } |
| } |
| printf ("\nEnd of Testing.\n"); |
| |
| return 0; |
| } |
| |
| #endif |