File: /usr/src/linux/fs/hfs/trans.c

1     /*
2      * linux/fs/hfs/trans.c
3      *
4      * Copyright (C) 1995-1997  Paul H. Hargrove
5      * This file may be distributed under the terms of the GNU General Public License.
6      *
7      * This file contains routines for converting between the Macintosh
8      * character set and various other encodings.  This includes dealing
9      * with ':' vs. '/' as the path-element separator.
10      *
11      * Latin-1 translation based on code contributed by Holger Schemel
12      * (aeglos@valinor.owl.de).
13      *
14      * The '8-bit', '7-bit ASCII' and '7-bit alphanumeric' encodings are
15      * implementations of the three encodings recommended by Apple in the
16      * document "AppleSingle/AppleDouble Formats: Developer's Note
17      * (9/94)".  This document is available from Apple's Technical
18      * Information Library from the World Wide Web server
19      * www.info.apple.com.
20      *
21      * The 'CAP' encoding is an implementation of the naming scheme used
22      * by the Columbia AppleTalk Package, available for anonymous FTP from
23      * ????.
24      *
25      * "XXX" in a comment is a note to myself to consider changing something.
26      *
27      * In function preconditions the term "valid" applied to a pointer to
28      * a structure means that the pointer is non-NULL and the structure it
29      * points to has all fields initialized to consistent values.
30      */
31     
32     #include "hfs.h"
33     #include <linux/hfs_fs_sb.h>
34     #include <linux/hfs_fs_i.h>
35     #include <linux/hfs_fs.h>
36     
37     /*================ File-local variables ================*/
38     
39     /* int->ASCII map for a single hex digit */
40     static char hex[16] = {'0','1','2','3','4','5','6','7',
41     		       '8','9','a','b','c','d','e','f'};
42     /*
43      * Latin-1 to Mac character set map
44      *
45      * For the sake of consistency this map is generated from the Mac to
46      * Latin-1 map the first time it is needed.  This means there is just
47      * one map to maintain.
48      */
49     static unsigned char latin2mac_map[128]; /* initially all zero */
50     
51     /*
52      * Mac to Latin-1 map for the upper 128 characters (both have ASCII in
53      * the lower 128 positions)
54      */
55     static unsigned char mac2latin_map[128] = {
56     	0xC4, 0xC5, 0xC7, 0xC9, 0xD1, 0xD6, 0xDC, 0xE1,
57     	0xE0, 0xE2, 0xE4, 0xE3, 0xE5, 0xE7, 0xE9, 0xE8,
58     	0xEA, 0xEB, 0xED, 0xEC, 0xEE, 0xEF, 0xF1, 0xF3,
59     	0xF2, 0xF4, 0xF6, 0xF5, 0xFA, 0xF9, 0xFB, 0xFC,
60     	0x00, 0xB0, 0xA2, 0xA3, 0xA7, 0xB7, 0xB6, 0xDF,
61     	0xAE, 0xA9, 0x00, 0xB4, 0xA8, 0x00, 0xC6, 0xD8,
62     	0x00, 0xB1, 0x00, 0x00, 0xA5, 0xB5, 0xF0, 0x00, 
63     	0x00, 0x00, 0x00, 0xAA, 0xBA, 0x00, 0xE6, 0xF8,
64     	0xBF, 0xA1, 0xAC, 0x00, 0x00, 0x00, 0x00, 0xAB,
65     	0xBB, 0x00, 0xA0, 0xC0, 0xC3, 0xD5, 0x00, 0x00, 
66     	0xAD, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF7, 0x00, 
67     	0xFF, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x00, 
68     	0x00, 0x00, 0xB8, 0x00, 0x00, 0xC2, 0xCA, 0xC1,
69     	0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xD3, 0xD4,
70     	0x00, 0xD2, 0xDA, 0xDB, 0xD9, 0x00, 0x00, 0x00,
71     	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
72     };
73     
74     /*================ File-local functions ================*/
75     
76     /*
77      * dehex()
78      *
79      * Given a hexadecimal digit in ASCII, return the integer representation.
80      */
81     static inline const unsigned char dehex(char c) {
82     	if ((c>='0')&&(c<='9')) {
83     		return c-'0';
84     	}
85     	if ((c>='a')&&(c<='f')) {
86     		return c-'a'+10;
87     	}
88     	if ((c>='A')&&(c<='F')) {
89     		return c-'A'+10;
90     	}
91     	return 0xff;
92     }
93     
94     /*================ Global functions ================*/
95     
96     /*
97      * hfs_mac2nat()
98      *
99      * Given a 'Pascal String' (a string preceded by a length byte) in
100      * the Macintosh character set produce the corresponding filename using
101      * the Netatalk name-mangling scheme, returning the length of the
102      * mangled filename.  Note that the output string is not NULL terminated.
103      *
104      * The name-mangling works as follows:
105      * Characters 32-126 (' '-'~') except '/' and any initial '.' are passed
106      * unchanged from input to output.  The remaining characters are replaced
107      * by three characters: ':xx' where xx is the hexadecimal representation
108      * of the character, using lowercase 'a' through 'f'.
109      */
110     int hfs_mac2nat(char *out, const struct hfs_name *in) {
111     	unsigned char c;
112     	const unsigned char *p = in->Name;
113     	int len = in->Len;
114     	int count = 0;
115     
116     	/* Special case for .AppleDesktop which in the
117     	   distant future may be a pseudodirectory. */
118     	if (strncmp(".AppleDesktop", p, len) == 0) {
119     		strncpy(out, p, 13);
120     		return 13;
121     	}
122     
123     	while (len--) {
124     		c = *p++;
125     		if ((c<32) || (c=='/') || (c>126) || (!count && (c=='.'))) {
126     			*out++ = ':';
127     			*out++ = hex[(c>>4) & 0xf];
128     			*out++ = hex[c & 0xf];
129     			count += 3;
130     		} else {
131     			*out++ = c;
132     			count++;
133     		}
134     	}
135     	return count;
136     }
137     
138     /*
139      * hfs_mac2cap()
140      *
141      * Given a 'Pascal String' (a string preceded by a length byte) in
142      * the Macintosh character set produce the corresponding filename using
143      * the CAP name-mangling scheme, returning the length of the mangled
144      * filename.  Note that the output string is not NULL terminated.
145      *
146      * The name-mangling works as follows:
147      * Characters 32-126 (' '-'~') except '/' are passed unchanged from
148      * input to output.  The remaining characters are replaced by three
149      * characters: ':xx' where xx is the hexadecimal representation of the
150      * character, using lowercase 'a' through 'f'.
151      */
152     int hfs_mac2cap(char *out, const struct hfs_name *in) {
153     	unsigned char c;
154     	const unsigned char *p = in->Name;
155     	int len = in->Len;
156     	int count = 0;
157     
158     	while (len--) {
159     		c = *p++;
160     		if ((c<32) || (c=='/') || (c>126)) {
161     			*out++ = ':';
162     			*out++ = hex[(c>>4) & 0xf];
163     			*out++ = hex[c & 0xf];
164     			count += 3;
165     		} else {
166     			*out++ = c;
167     			count++;
168     		}
169     	}
170     	return count;
171     }
172     
173     /*
174      * hfs_mac2eight()
175      *
176      * Given a 'Pascal String' (a string preceded by a length byte) in
177      * the Macintosh character set produce the corresponding filename using
178      * the '8-bit' name-mangling scheme, returning the length of the
179      * mangled filename.  Note that the output string is not NULL
180      * terminated.
181      *
182      * This is one of the three recommended naming conventions described
183      * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
184      * Note (9/94)"
185      *
186      * The name-mangling works as follows:
187      * Characters 0, '%' and '/' are replaced by three characters: '%xx'
188      * where xx is the hexadecimal representation of the character, using
189      * lowercase 'a' through 'f'.  All other characters are passed
190      * unchanged from input to output.  Note that this format is mainly
191      * implemented for completeness and is rather hard to read.
192      */
193     int hfs_mac2eight(char *out, const struct hfs_name *in) {
194     	unsigned char c;
195     	const unsigned char *p = in->Name;
196     	int len = in->Len;
197     	int count = 0;
198     
199     	while (len--) {
200     		c = *p++;
201     		if (!c || (c=='/') || (c=='%')) {
202     			*out++ = '%';
203     			*out++ = hex[(c>>4) & 0xf];
204     			*out++ = hex[c & 0xf];
205     			count += 3;
206     		} else {
207     			*out++ = c;
208     			count++;
209     		}
210     	}
211     	return count;
212     }
213     
214     /*
215      * hfs_mac2seven()
216      *
217      * Given a 'Pascal String' (a string preceded by a length byte) in
218      * the Macintosh character set produce the corresponding filename using
219      * the '7-bit ASCII' name-mangling scheme, returning the length of the
220      * mangled filename.  Note that the output string is not NULL
221      * terminated.
222      *
223      * This is one of the three recommended naming conventions described
224      * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
225      * Note (9/94)"
226      *
227      * The name-mangling works as follows:
228      * Characters 0, '%', '/' and 128-255 are replaced by three
229      * characters: '%xx' where xx is the hexadecimal representation of the
230      * character, using lowercase 'a' through 'f'.	All other characters
231      * are passed unchanged from input to output.  Note that control
232      * characters (including newline) and space are unchanged make reading
233      * these filenames difficult.
234      */
235     int hfs_mac2seven(char *out, const struct hfs_name *in) {
236     	unsigned char c;
237     	const unsigned char *p = in->Name;
238     	int len = in->Len;
239     	int count = 0;
240     
241     	while (len--) {
242     		c = *p++;
243     		if (!c || (c=='/') || (c=='%') || (c&0x80)) {
244     			*out++ = '%';
245     			*out++ = hex[(c>>4) & 0xf];
246     			*out++ = hex[c & 0xf];
247     			count += 3;
248     		} else {
249     			*out++ = c;
250     			count++;
251     		}
252     	}
253     	return count;
254     }
255     
256     /*
257      * hfs_mac2alpha()
258      *
259      * Given a 'Pascal String' (a string preceded by a length byte) in
260      * the Macintosh character set produce the corresponding filename using
261      * the '7-bit alphanumeric' name-mangling scheme, returning the length
262      * of the mangled filename.  Note that the output string is not NULL
263      * terminated.
264      *
265      * This is one of the three recommended naming conventions described
266      * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
267      * Note (9/94)"
268      *
269      * The name-mangling works as follows:
270      * The characters 'a'-'z', 'A'-'Z', '0'-'9', '_' and the last '.' in
271      * the filename are passed unchanged from input to output.  All
272      * remaining characters (including any '.'s other than the last) are
273      * replaced by three characters: '%xx' where xx is the hexadecimal
274      * representation of the character, using lowercase 'a' through 'f'.
275      */
276     int hfs_mac2alpha(char *out, const struct hfs_name *in) {
277     	unsigned char c;
278     	const unsigned char *p = in->Name;
279     	int len = in->Len;
280     	int count = 0;
281     	const unsigned char *lp;	/* last period */
282     
283     	/* strrchr() would be good here, but 'in' is not null-terminated */
284     	for (lp=p+len-1; (lp>=p)&&(*lp!='.'); --lp) {}
285     	++lp;
286     
287     	while (len--) {
288     		c = *p++;
289     		if ((p==lp) || ((c>='0')&&(c<='9')) || ((c>='A')&&(c<='Z')) ||
290     				((c>='a')&&(c<='z')) || (c=='_')) {
291     			*out++ = c;
292     			count++;
293     		} else {
294     			*out++ = '%';
295     			*out++ = hex[(c>>4) & 0xf];
296     			*out++ = hex[c & 0xf];
297     			count += 3;
298     		}
299     	}
300     	return count;
301     }
302     
303     /*
304      * hfs_mac2triv()
305      *
306      * Given a 'Pascal String' (a string preceded by a length byte) in
307      * the Macintosh character set produce the corresponding filename using
308      * the 'trivial' name-mangling scheme, returning the length of the
309      * mangled filename.  Note that the output string is not NULL
310      * terminated.
311      *
312      * The name-mangling works as follows:
313      * The character '/', which is illegal in Linux filenames is replaced
314      * by ':' which never appears in HFS filenames.	 All other characters
315      * are passed unchanged from input to output.
316      */
317     int hfs_mac2triv(char *out, const struct hfs_name *in) {
318     	unsigned char c;
319     	const unsigned char *p = in->Name;
320     	int len = in->Len;
321     	int count = 0;
322     
323     	while (len--) {
324     		c = *p++;
325     		if (c=='/') {
326     			*out++ = ':';
327     		} else {
328     			*out++ = c;
329     		}
330     		count++;
331     	}
332     	return count;
333     }
334     
335     /*
336      * hfs_mac2latin()
337      *
338      * Given a 'Pascal String' (a string preceded by a length byte) in
339      * the Macintosh character set produce the corresponding filename using
340      * the 'Latin-1' name-mangling scheme, returning the length of the
341      * mangled filename.  Note that the output string is not NULL
342      * terminated.
343      *
344      * The Macintosh character set and Latin-1 are both extensions of the
345      * ASCII character set.	 Some, but certainly not all, of the characters
346      * in the Macintosh character set are also in Latin-1 but not with the
347      * same encoding.  This name-mangling scheme replaces the characters in
348      * the Macintosh character set that have Latin-1 equivalents by those
349      * equivalents; the characters 32-126, excluding '/' and '%', are
350      * passed unchanged from input to output.  The remaining characters
351      * are replaced by three characters: '%xx' where xx is the hexadecimal
352      * representation of the character, using lowercase 'a' through 'f'.
353      *
354      * The array mac2latin_map[] indicates the correspondence between the
355      * two character sets.	The byte in element x-128 gives the Latin-1
356      * encoding of the character with encoding x in the Macintosh
357      * character set.  A value of zero indicates Latin-1 has no
358      * corresponding character.
359      */
360     int hfs_mac2latin(char *out, const struct hfs_name *in) {
361     	unsigned char c;
362     	const unsigned char *p = in->Name;
363     	int len = in->Len;
364     	int count = 0;
365     
366     	while (len--) {
367     		c = *p++;
368     
369     		if ((c & 0x80) && mac2latin_map[c & 0x7f]) {
370     			*out++ = mac2latin_map[c & 0x7f];
371     			count++;
372     		} else if ((c>=32) && (c<=126) && (c!='/') && (c!='%')) {
373     			*out++ =  c;
374     			count++;
375     		} else {
376     			*out++ = '%';
377     			*out++ = hex[(c>>4) & 0xf];
378     			*out++ = hex[c & 0xf];
379     			count += 3;
380     		}
381     	}
382     	return count;
383     }
384     
385     /*
386      * hfs_colon2mac()
387      *
388      * Given an ASCII string (not null-terminated) and its length,
389      * generate the corresponding filename in the Macintosh character set
390      * using the 'CAP' name-mangling scheme, returning the length of the
391      * mangled filename.  Note that the output string is not NULL
392      * terminated.
393      *
394      * This routine is a inverse to hfs_mac2cap() and hfs_mac2nat().
395      * A ':' not followed by a 2-digit hexadecimal number (or followed
396      * by the codes for NULL or ':') is replaced by a '|'.
397      */
398     void hfs_colon2mac(struct hfs_name *out, const char *in, int len) {
399     	int hi, lo;
400     	unsigned char code, c, *count;
401     	unsigned char *p = out->Name;
402     
403     	out->Len = 0;
404     	count = &out->Len;
405     	while (len-- && (*count < HFS_NAMELEN)) {
406     		c = *in++;
407     		(*count)++;
408     		if (c!=':') {
409     			*p++ = c;
410     		} else if ((len<2) ||
411     			   ((hi=dehex(in[0])) & 0xf0) ||
412     			   ((lo=dehex(in[1])) & 0xf0) ||
413     			   !(code = (hi << 4) | lo) ||
414     			   (code == ':')) {
415     			*p++ = '|';
416     		} else {
417     			*p++ = code;
418     			len -= 2;
419     			in += 2;
420     		}
421     	}
422     }
423     
424     /*
425      * hfs_prcnt2mac()
426      *
427      * Given an ASCII string (not null-terminated) and its length,
428      * generate the corresponding filename in the Macintosh character set
429      * using Apple's three recommended name-mangling schemes, returning
430      * the length of the mangled filename.	Note that the output string is
431      * not NULL terminated.
432      *
433      * This routine is a inverse to hfs_mac2alpha(), hfs_mac2seven() and
434      * hfs_mac2eight().
435      * A '%' not followed by a 2-digit hexadecimal number (or followed
436      * by the code for NULL or ':') is unchanged.
437      * A ':' is replaced by a '|'.
438      */
439     void hfs_prcnt2mac(struct hfs_name *out, const char *in, int len) {
440     	int hi, lo;
441     	unsigned char code, c, *count;
442     	unsigned char *p = out->Name;
443     
444     	out->Len = 0;
445     	count = &out->Len;
446     	while (len-- && (*count < HFS_NAMELEN)) {
447     		c = *in++;
448     		(*count)++;
449     		if (c==':') {
450     			*p++ = '|';
451     		} else if (c!='%') {
452     			*p++ = c;
453     		} else if ((len<2) ||
454     			   ((hi=dehex(in[0])) & 0xf0) ||
455     			   ((lo=dehex(in[1])) & 0xf0) ||
456     			   !(code = (hi << 4) | lo) ||
457     			   (code == ':')) {
458     			*p++ = '%';
459     		} else {
460     			*p++ = code;
461     			len -= 2;
462     			in += 2;
463     		}
464     	}
465     }
466     
467     /*
468      * hfs_triv2mac()
469      *
470      * Given an ASCII string (not null-terminated) and its length,
471      * generate the corresponding filename in the Macintosh character set
472      * using the 'trivial' name-mangling scheme, returning the length of
473      * the mangled filename.  Note that the output string is not NULL
474      * terminated.
475      *
476      * This routine is a inverse to hfs_mac2triv().
477      * A ':' is replaced by a '/'.
478      */
479     void hfs_triv2mac(struct hfs_name *out, const char *in, int len) {
480     	unsigned char c, *count;
481     	unsigned char *p = out->Name;
482     
483     	out->Len = 0;
484     	count = &out->Len;
485     	while (len-- && (*count < HFS_NAMELEN)) {
486     		c = *in++;
487     		(*count)++;
488     		if (c==':') {
489     			*p++ = '/';
490     		} else {
491     			*p++ = c;
492     		}
493     	}
494     }
495     
496     /*
497      * hfs_latin2mac()
498      *
499      * Given an Latin-1 string (not null-terminated) and its length,
500      * generate the corresponding filename in the Macintosh character set
501      * using the 'Latin-1' name-mangling scheme, returning the length of
502      * the mangled filename.  Note that the output string is not NULL
503      * terminated.
504      *
505      * This routine is a inverse to hfs_latin2cap().
506      * A '%' not followed by a 2-digit hexadecimal number (or followed
507      * by the code for NULL or ':') is unchanged.
508      * A ':' is replaced by a '|'.
509      *
510      * Note that the character map is built the first time it is needed.
511      */
512     void hfs_latin2mac(struct hfs_name *out, const char *in, int len)
513     {
514     	int hi, lo;
515     	unsigned char code, c, *count;
516     	unsigned char *p = out->Name;
517     	static int map_initialized;
518     
519     	if (!map_initialized) {
520     		int i;
521     
522     		/* build the inverse mapping at run time */
523     		for (i = 0; i < 128; i++) {
524     			if ((c = mac2latin_map[i])) {
525     				latin2mac_map[(int)c - 128] = i + 128;
526     			}
527     		}
528     		map_initialized = 1;
529     	}
530     
531     	out->Len = 0;
532     	count = &out->Len;
533     	while (len-- && (*count < HFS_NAMELEN)) {
534     		c = *in++;
535     		(*count)++;
536     
537     		if (c==':') {
538     			*p++ = '|';
539     		} else if (c!='%') {
540     			if (c<128 || !(*p = latin2mac_map[c-128])) {
541     				*p = c;
542     			}
543     			p++;
544     		} else if ((len<2) ||
545     			   ((hi=dehex(in[0])) & 0xf0) ||
546     			   ((lo=dehex(in[1])) & 0xf0) ||
547     			   !(code = (hi << 4) | lo) ||
548     			   (code == ':')) {
549     			*p++ = '%';
550     		} else {
551     			*p++ = code;
552     			len -= 2;
553     			in += 2;
554     		}
555     	}
556     }
557