File: /usr/src/linux/fs/umsdos/mangle.c

1     /*
2      *  linux/fs/umsdos/mangle.c
3      *
4      *      Written 1993 by Jacques Gelinas 
5      *
6      * Control the mangling of file name to fit msdos name space.
7      * Many optimisations by GLU == dglaude@is1.vub.ac.be (Glaude David)
8      */
9     
10     #include <linux/errno.h>
11     #include <linux/string.h>
12     #include <linux/kernel.h>
13     #include <linux/umsdos_fs.h>
14     
15     /* (This file is used outside of the kernel) */
16     #ifndef __KERNEL__
17     #define KERN_WARNING
18     #endif
19     
20     /*
21      * Complete the mangling of the MSDOS fake name
22      * based on the position of the entry in the EMD file.
23      * 
24      * Simply complete the job of umsdos_parse; fill the extension.
25      * 
26      * Beware that info->f_pos must be set.
27      */
28     void umsdos_manglename (struct umsdos_info *info)
29     {
30     	if (info->msdos_reject) {
31     		/* #Specification: file name / non MSDOS conforming / mangling
32     		 * Each non MSDOS conforming file has a special extension
33     		 * build from the entry position in the EMD file.
34     		 * 
35     		 * This number is then transform in a base 32 number, where
36     		 * each digit is expressed like hexadecimal number, using
37     		 * digit and letter, except it uses 22 letters from 'a' to 'v'.
38     		 * The number 32 comes from 2**5. It is faster to split a binary
39     		 * number using a base which is a power of two. And I was 32
40     		 * when I started this project. Pick your answer :-) .
41     		 * 
42     		 * If the result is '0', it is replace with '_', simply
43     		 * to make it odd.
44     		 * 
45     		 * This is true for the first two character of the extension.
46     		 * The last one is taken from a list of odd character, which
47     		 * are:
48     		 * 
49     		 * { } ( ) ! ` ^ & @
50     		 * 
51     		 * With this scheme, we can produce 9216 ( 9* 32 * 32)
52     		 * different extensions which should not clash with any useful
53     		 * extension already popular or meaningful. Since most directory
54     		 * have much less than 32 * 32 files in it, the first character
55     		 * of the extension of any mangled name will be {.
56     		 * 
57     		 * Here are the reason to do this (this kind of mangling).
58     		 * 
59     		 * -The mangling is deterministic. Just by the extension, we
60     		 * are able to locate the entry in the EMD file.
61     		 * 
62     		 * -By keeping to beginning of the file name almost unchanged,
63     		 * we are helping the MSDOS user.
64     		 * 
65     		 * -The mangling produces names not too ugly, so an msdos user
66     		 * may live with it (remember it, type it, etc...).
67     		 * 
68     		 * -The mangling produces names ugly enough so no one will
69     		 * ever think of using such a name in real life. This is not
70     		 * fool proof. I don't think there is a total solution to this.
71     		 */
72     		int entry_num;
73     		char *pt = info->fake.fname + info->fake.len;
74     		/* lookup for encoding the last character of the extension 
75     		 * It contains valid character after the ugly one to make sure 
76     		 * even if someone overflows the 32 * 32 * 9 limit, it still 
77     		 * does something 
78     		 */
79     #define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
80     		static char lookup3[] =
81     		{
82     			SPECIAL_MANGLING,
83     		/* This is the start of lookup12 */
84     			'_', '1', '2', '3', '4', '5', '6', '7', '8', '9',
85     			'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
86     			'p', 'q', 'r', 's', 't', 'u', 'v'
87     		};
88     
89     #define lookup12 (lookup3+9)
90     		entry_num = info->f_pos / UMSDOS_REC_SIZE;
91     		if (entry_num > (9* 32 * 32)){
92     			printk (KERN_WARNING "UMSDOS: more than 9216 files in a directory.\n"
93     				"This may break the mangling strategy.\n"
94     				"Not a killer problem. See doc.\n");
95     		}
96     		*pt++ = '.';
97     		*pt++ = lookup3 [(entry_num >> 10) & 31];
98     		*pt++ = lookup12[(entry_num >> 5) & 31];
99     		*pt++ = lookup12[entry_num & 31];
100     		*pt = '\0';		/* help doing printk */	
101     		info->fake.len += 4;
102     		info->msdos_reject = 0;		/* Avoid mangling twice */
103     	}
104     }
105     
106     /*
107      * Evaluate the record size needed to store of name of len character.
108      * The value returned is a multiple of UMSDOS_REC_SIZE.
109      */
110     int umsdos_evalrecsize (int len)
111     {
112     	struct umsdos_dirent dirent;
113     	int nbrec = 1 + ((len - 1 + (dirent.name - (char *) &dirent))
114     			 / UMSDOS_REC_SIZE);
115     
116     	return nbrec * UMSDOS_REC_SIZE;
117     	/*
118     	 * GLU        This should be inlined or something to speed it up to the max.
119     	 * GLU        nbrec is absolutely not needed to return the value.
120     	 */
121     }
122     #ifdef TEST
123     int umsdos_evalrecsize_old (int len)
124     {
125     	struct umsdos_dirent dirent;
126     	int size = len + (dirent.name - (char *) &dirent);
127     	int nbrec = size / UMSDOS_REC_SIZE;
128     	int extra = size % UMSDOS_REC_SIZE;
129     
130     	if (extra > 0)
131     		nbrec++;
132     	return nbrec * UMSDOS_REC_SIZE;
133     }
134     #endif
135     
136     
137     /*
138      * Fill the struct info with the full and msdos name of a file
139      * Return 0 if all is OK, a negative error code otherwise.
140      */
141     int umsdos_parse (
142     			 const char *fname,
143     			 int len,
144     			 struct umsdos_info *info)
145     {
146     	int ret = -ENAMETOOLONG;
147     
148     	/* #Specification: file name / too long
149     	 * If a file name exceed UMSDOS maxima, the file name is silently
150     	 * truncated. This makes it conformant with the other file system
151     	 * of Linux (minix and ext2 at least).
152     	 */
153     	if (len > UMSDOS_MAXNAME)
154     		len = UMSDOS_MAXNAME;
155     	{
156     		const char *firstpt = NULL;	/* First place we saw a "." in fname */
157     
158     		/* #Specification: file name / non MSDOS conforming / base length 0
159     		 * file names beginning with a period '.' are invalid for MS-DOS.
160     		 * It needs absolutely a base name. So the file name is mangled
161     		 */
162     		int ivldchar = fname[0] == '.';		/* At least one invalid character */
163     		int msdos_len = len;
164     		int base_len;
165     
166     		/*
167     		 * cardinal_per_size tells if there exists at least one
168     		 * DOS pseudo device on length n.  See the test below.
169     		 */
170     		static const char cardinal_per_size[9] =
171     		{
172     			0, 0, 0, 1, 1, 0, 1, 0, 1
173     		};
174     
175     		/*
176     		 * lkp translate all character to acceptable character (for DOS).
177     		 * When lkp[n] == n, it means also it is an acceptable one.
178     		 * So it serves both as a flag and as a translator.
179     		 */
180     		static char lkp[256];
181     		static char is_init = 0;
182     
183     		if (!is_init) {
184     			/*
185     			 * Initialisation of the array is easier and less error
186                              * prone like this.
187     			 */
188     			int i;
189     			static const char *spc = "\"*+,/:;<=>?[\\]|~";
190     
191     			is_init = 1;
192     			for (i = 0; i <= 32; i++)
193     				lkp[i] = '#';
194     			for (i = 33; i < 'A'; i++)
195     				lkp[i] = (char) i;
196     			for (i = 'A'; i <= 'Z'; i++)
197     				lkp[i] = (char) (i + ('a' - 'A'));
198     			for (i = 'Z' + 1; i < 127; i++)
199     				lkp[i] = (char) i;
200     			for (i = 128; i < 256; i++)
201     				lkp[i] = '#';
202     
203     			lkp['.'] = '_';
204     			while (*spc != '\0')
205     				lkp[(unsigned char) (*spc++)] = '#';
206     		}
207     		/*  GLU
208     		 * File names longer than 8+'.'+3 are invalid for MS-DOS,
209     		 * so the file name is to be mangled--no further test is needed.
210     		 * This speeds up handling of long names.
211     		 * The position of the last point is no more necessary anyway.
212     		 */
213     		if (len <= (8 + 1 + 3)) {
214     			const char *pt = fname;
215     			const char *endpt = fname + len;
216     
217     			while (pt < endpt) {
218     				if (*pt == '.') {
219     					if (firstpt != NULL) {
220     						/* 2 . in a file name. Reject */
221     						ivldchar = 1;
222     						break;
223     					} else {
224     						int extlen = (int) (endpt - pt);
225     
226     						firstpt = pt;
227     						if (firstpt - fname > 8) {
228     							/* base name longer than 8: reject */
229     							ivldchar = 1;
230     							break;
231     						} else if (extlen > 4) {
232     							/* Extension longer than 4 (including .): reject */
233     							ivldchar = 1;
234     							break;
235     						} else if (extlen == 1) {
236     							/* #Specification: file name / non MSDOS conforming / last char == .
237     							 * If the last character of a file name is
238     							 * a period, mangling is applied. MS-DOS does
239     							 * not support those file names.
240     							 */
241     							ivldchar = 1;
242     							break;
243     						} else if (extlen == 4) {
244     							/* #Specification: file name / non MSDOS conforming / mangling clash
245     							 * To avoid clash with    the umsdos mangling, any file
246     							 * with a special character as the first character
247     							 * of the extension will be mangled. This solves the
248     							 * following problem:
249     							 * 
250     							 * #
251     							 * touch FILE
252     							 * # FILE is invalid for DOS, so mangling is applied
253     							 * # file.{_1 is created in the DOS directory
254     							 * touch file.{_1
255     							 * # To UMSDOS file point to a single DOS entry.
256     							 * # So file.{_1 has to be mangled.
257     							 * #
258     							 */
259     							static char special[] =
260     							{
261     								SPECIAL_MANGLING, '\0'
262     							};
263     
264     							if (strchr (special, firstpt[1]) != NULL) {
265     								ivldchar = 1;
266     								break;
267     							}
268     						}
269     					}
270     				} else if (lkp[(unsigned char) (*pt)] != *pt) {
271     					ivldchar = 1;
272     					break;
273     				}
274     				pt++;
275     			}
276     		} else {
277     			ivldchar = 1;
278     		}
279     		if (ivldchar
280     		    || (firstpt == NULL && len > 8)
281     		    || (len == UMSDOS_EMD_NAMELEN
282     			&& memcmp (fname, UMSDOS_EMD_FILE, UMSDOS_EMD_NAMELEN) == 0)) {
283     			/* #Specification: file name / --linux-.---
284     			 * The name of the EMD file --linux-.--- is map to a mangled
285     			 * name. So UMSDOS does not restrict its use.
286     			 */
287     			/* #Specification: file name / non MSDOS conforming / mangling
288     			 * Non MSDOS conforming file names must use some alias to fit
289     			 * in the MSDOS name space.
290     			 * 
291     			 * The strategy is simple. The name is simply truncated to
292     			 * 8 char. points are replace with underscore and a
293     			 * number is given as an extension. This number correspond
294     			 * to the entry number in the EMD file. The EMD file
295     			 * only need to carry the real name.
296     			 * 
297     			 * Upper case is also converted to lower case.
298     			 * Control character are converted to #.
299     			 * Spaces are converted to #.
300     			 * The following characters are also converted to #.
301     			 * #
302     			 * " * + , / : ; < = > ? [ \ ] | ~
303     			 * #
304     			 * 
305     			 * Sometimes the problem is not in MS-DOS itself but in
306     			 * command.com.
307     			 */
308     			int i;
309     			char *pt = info->fake.fname;
310     
311     			base_len = msdos_len = (msdos_len > 8) ? 8 : msdos_len;
312     			/*
313     			 * There is no '.' any more so we know for a fact that
314     			 * the base length is the length.
315     			 */
316     			memcpy (info->fake.fname, fname, msdos_len);
317     			for (i = 0; i < msdos_len; i++, pt++)
318     				*pt = lkp[(unsigned char) (*pt)];
319     			*pt = '\0';	/* GLU  We force null termination. */
320     			info->msdos_reject = 1;
321     			/*
322     			 * The numeric extension is added only when we know
323     			 * the position in the EMD file, in umsdos_newentry(),
324     			 * umsdos_delentry(), and umsdos_findentry().
325     			 * See umsdos_manglename().
326     			 */
327     		} else {
328     			/* Conforming MSDOS file name */
329     			strncpy (info->fake.fname, fname, len);
330     			info->msdos_reject = 0;
331     			base_len = firstpt != NULL ? (int) (firstpt - fname) : len;
332     		}
333     		if (cardinal_per_size[base_len]) {
334     			/* #Specification: file name / MSDOS devices / mangling
335     			 * To avoid unreachable file from MS-DOS, any MS-DOS conforming
336     			 * file with a basename equal to one of the MS-DOS pseudo
337     			 * devices will be mangled.
338     			 * 
339     			 * If a file such as "prn" was created, it would be unreachable
340     			 * under MS-DOS because "prn" is assumed to be the printer, even
341     			 * if the file does have an extension.
342     			 * 
343     			 * Since the extension is unimportant to MS-DOS, we must patch
344     			 * the basename also. We simply insert a minus '-'. To avoid
345     			 * conflict with valid file with a minus in front (such as
346     			 * "-prn"), we add an mangled extension like any other
347     			 * mangled file name.
348     			 * 
349     			 * Here is the list of DOS pseudo devices:
350     			 * 
351     			 * #
352     			 * "prn","con","aux","nul",
353     			 * "lpt1","lpt2","lpt3","lpt4",
354     			 * "com1","com2","com3","com4",
355     			 * "clock$"
356     			 * #
357     			 * 
358     			 * and some standard ones for common DOS programs
359     			 * 
360     			 * "emmxxxx0","xmsxxxx0","setverxx"
361     			 * 
362     			 * (Thanks to Chris Hall <cah17@phoenix.cambridge.ac.uk>
363     			 * for pointing these out to me).
364     			 * 
365     			 * Is there one missing?
366     			 */
367     			/* This table must be ordered by length */
368     			static const char *tbdev[] =
369     			{
370     				"prn", "con", "aux", "nul",
371     				"lpt1", "lpt2", "lpt3", "lpt4",
372     				"com1", "com2", "com3", "com4",
373     				"clock$",
374     				"emmxxxx0", "xmsxxxx0", "setverxx"
375     			};
376     
377     			/* Tell where to find in tbdev[], the first name of */
378     			/* a certain length */
379     			static const char start_ind_dev[9] =
380     			{
381     				0, 0, 0, 4, 12, 12, 13, 13, 16
382     			};
383     			char basen[9];
384     			int i;
385     
386     			for (i = start_ind_dev[base_len - 1]; i < start_ind_dev[base_len]; i++) {
387     				if (memcmp (info->fake.fname, tbdev[i], base_len) == 0) {
388     					memcpy (basen, info->fake.fname, base_len);
389     					basen[base_len] = '\0';		/* GLU  We force null termination. */
390     					/*
391     					 * GLU        We do that only if necessary; we try to do the
392     					 * GLU        simple thing in the usual circumstance. 
393     					 */
394     					info->fake.fname[0] = '-';
395     					strcpy (info->fake.fname + 1, basen);	/* GLU  We already guaranteed a null would be at the end. */
396     					msdos_len = (base_len == 8) ? 8 : base_len + 1;
397     					info->msdos_reject = 1;
398     					break;
399     				}
400     			}
401     		}
402     		info->fake.fname[msdos_len] = '\0';	/* Help doing printk */
403     		/* GLU      This zero should (always?) be there already. */
404     		info->fake.len = msdos_len;
405     		/* Why not use info->fake.len everywhere? Is it longer?
406                      */
407     		memcpy (info->entry.name, fname, len);
408     		info->entry.name[len] = '\0';	/* for printk */
409     		info->entry.name_len = len;
410     		ret = 0;
411     	}
412     	/*
413     	 * Evaluate how many records are needed to store this entry.
414     	 */
415     	info->recsize = umsdos_evalrecsize (len);
416     	return ret;
417     }
418     
419     #ifdef TEST
420     
421     struct MANG_TEST {
422     	char *fname;		/* Name to validate */
423     	int msdos_reject;	/* Expected msdos_reject flag */
424     	char *msname;		/* Expected msdos name */
425     };
426     
427     struct MANG_TEST tb[] =
428     {
429     	"hello", 0, "hello",
430     	"hello.1", 0, "hello.1",
431     	"hello.1_", 0, "hello.1_",
432     	"prm", 0, "prm",
433     
434     #ifdef PROPOSITION
435     	"HELLO", 1, "hello",
436     	"Hello.1", 1, "hello.1",
437     	"Hello.c", 1, "hello.c",
438     #else
439     /*
440      * I find the three examples below very unfortunate.  I propose to
441      * convert them to lower case in a quick preliminary pass, then test
442      * whether there are other troublesome characters.  I have not made
443      * this change, because it is not easy, but I wanted to mention the 
444      * principle.  Obviously something like that would increase the chance
445      * of collisions, for example between "HELLO" and "Hello", but these
446      * can be treated elsewhere along with the other collisions.
447      */
448     
449     	"HELLO", 1, "hello",
450     	"Hello.1", 1, "hello_1",
451     	"Hello.c", 1, "hello_c",
452     #endif
453     
454     	"hello.{_1", 1, "hello_{_",
455     	"hello\t", 1, "hello#",
456     	"hello.1.1", 1, "hello_1_",
457     	"hel,lo", 1, "hel#lo",
458     	"Salut.Tu.vas.bien?", 1, "salut_tu",
459     	".profile", 1, "_profile",
460     	".xv", 1, "_xv",
461     	"toto.", 1, "toto_",
462     	"clock$.x", 1, "-clock$",
463     	"emmxxxx0", 1, "-emmxxxx",
464     	"emmxxxx0.abcd", 1, "-emmxxxx",
465     	"aux", 1, "-aux",
466     	"prn", 1, "-prn",
467     	"prn.abc", 1, "-prn",
468     	"PRN", 1, "-prn",
469       /* 
470        * GLU        WARNING:  the results of these are different with my version
471        * GLU        of mangling compared to the original one.
472        * GLU        CAUSE:  the manner of calculating the baselen variable.
473        * GLU                For you they are always 3.
474        * GLU                For me they are respectively 7, 8, and 8.
475     
476        */
477     	"PRN.abc", 1, "prn_abc",
478     	"Prn.abcd", 1, "prn_abcd",
479     	"prn.abcd", 1, "prn_abcd",
480     	"Prn.abcdefghij", 1, "prn_abcd"
481     };
482     
483     int main (int argc, char *argv[])
484     {
485     	int i, rold, rnew;
486     
487     	printf ("Testing the umsdos_parse.\n");
488     	for (i = 0; i < sizeof (tb) / sizeof (tb[0]); i++) {
489     		struct MANG_TEST *pttb = tb + i;
490     		struct umsdos_info info;
491     		int ok = umsdos_parse (pttb->fname, strlen (pttb->fname), &info);
492     
493     		if (strcmp (info.fake.fname, pttb->msname) != 0) {
494     			printf ("**** %s -> ", pttb->fname);
495     			printf ("%s <> %s\n", info.fake.fname, pttb->msname);
496     		} else if (info.msdos_reject != pttb->msdos_reject) {
497     			printf ("**** %s -> %s ", pttb->fname, pttb->msname);
498     			printf ("%d <> %d\n", info.msdos_reject, pttb->msdos_reject);
499     		} else {
500     			printf ("     %s -> %s %d\n", pttb->fname, pttb->msname
501     				,pttb->msdos_reject);
502     		}
503     	}
504     	printf ("Testing the new umsdos_evalrecsize.");
505     	for (i = 0; i < UMSDOS_MAXNAME; i++) {
506     		rnew = umsdos_evalrecsize (i);
507     		rold = umsdos_evalrecsize_old (i);
508     		if (!(i % UMSDOS_REC_SIZE)) {
509     			printf ("\n%d:\t", i);
510     		}
511     		if (rnew != rold) {
512     			printf ("**** %d newres: %d != %d \n", i, rnew, rold);
513     		} else {
514     			printf (".");
515     		}
516     	}
517     	printf ("\nEnd of Testing.\n");
518     
519     	return 0;
520     }
521     
522     #endif
523