File: /usr/src/linux/drivers/char/mem.c

1     /*
2      *  linux/drivers/char/mem.c
3      *
4      *  Copyright (C) 1991, 1992  Linus Torvalds
5      *
6      *  Added devfs support. 
7      *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
8      *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
9      */
10     
11     #include <linux/config.h>
12     #include <linux/mm.h>
13     #include <linux/miscdevice.h>
14     #include <linux/tpqic02.h>
15     #include <linux/ftape.h>
16     #include <linux/slab.h>
17     #include <linux/vmalloc.h>
18     #include <linux/mman.h>
19     #include <linux/random.h>
20     #include <linux/init.h>
21     #include <linux/raw.h>
22     #include <linux/tty.h>
23     #include <linux/capability.h>
24     
25     #include <asm/uaccess.h>
26     #include <asm/io.h>
27     #include <asm/pgalloc.h>
28     
29     #ifdef CONFIG_I2C
30     extern int i2c_init_all(void);
31     #endif
32     #ifdef CONFIG_FB
33     extern void fbmem_init(void);
34     #endif
35     #ifdef CONFIG_PROM_CONSOLE
36     extern void prom_con_init(void);
37     #endif
38     #ifdef CONFIG_MDA_CONSOLE
39     extern void mda_console_init(void);
40     #endif
41     #if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
42     extern void tapechar_init(void);
43     #endif
44          
45     static ssize_t do_write_mem(struct file * file, void *p, unsigned long realp,
46     			    const char * buf, size_t count, loff_t *ppos)
47     {
48     	ssize_t written;
49     
50     	written = 0;
51     #if defined(__sparc__) || defined(__mc68000__)
52     	/* we don't have page 0 mapped on sparc and m68k.. */
53     	if (realp < PAGE_SIZE) {
54     		unsigned long sz = PAGE_SIZE-realp;
55     		if (sz > count) sz = count; 
56     		/* Hmm. Do something? */
57     		buf+=sz;
58     		p+=sz;
59     		count-=sz;
60     		written+=sz;
61     	}
62     #endif
63     	if (copy_from_user(p, buf, count))
64     		return -EFAULT;
65     	written += count;
66     	*ppos += written;
67     	return written;
68     }
69     
70     
71     /*
72      * This funcion reads the *physical* memory. The f_pos points directly to the 
73      * memory location. 
74      */
75     static ssize_t read_mem(struct file * file, char * buf,
76     			size_t count, loff_t *ppos)
77     {
78     	unsigned long p = *ppos;
79     	unsigned long end_mem;
80     	ssize_t read;
81     	
82     	end_mem = __pa(high_memory);
83     	if (p >= end_mem)
84     		return 0;
85     	if (count > end_mem - p)
86     		count = end_mem - p;
87     	read = 0;
88     #if defined(__sparc__) || defined(__mc68000__)
89     	/* we don't have page 0 mapped on sparc and m68k.. */
90     	if (p < PAGE_SIZE) {
91     		unsigned long sz = PAGE_SIZE-p;
92     		if (sz > count) 
93     			sz = count; 
94     		if (sz > 0) {
95     			if (clear_user(buf, sz))
96     				return -EFAULT;
97     			buf += sz; 
98     			p += sz; 
99     			count -= sz; 
100     			read += sz; 
101     		}
102     	}
103     #endif
104     	if (copy_to_user(buf, __va(p), count))
105     		return -EFAULT;
106     	read += count;
107     	*ppos += read;
108     	return read;
109     }
110     
111     static ssize_t write_mem(struct file * file, const char * buf, 
112     			 size_t count, loff_t *ppos)
113     {
114     	unsigned long p = *ppos;
115     	unsigned long end_mem;
116     
117     	end_mem = __pa(high_memory);
118     	if (p >= end_mem)
119     		return 0;
120     	if (count > end_mem - p)
121     		count = end_mem - p;
122     	return do_write_mem(file, __va(p), p, buf, count, ppos);
123     }
124     
125     #ifndef pgprot_noncached
126     
127     /*
128      * This should probably be per-architecture in <asm/pgtable.h>
129      */
130     static inline pgprot_t pgprot_noncached(pgprot_t _prot)
131     {
132     	unsigned long prot = pgprot_val(_prot);
133     
134     #if defined(__i386__) || defined(__x86_64__)
135     	/* On PPro and successors, PCD alone doesn't always mean 
136     	    uncached because of interactions with the MTRRs. PCD | PWT
137     	    means definitely uncached. */ 
138     	if (boot_cpu_data.x86 > 3)
139     		prot |= _PAGE_PCD | _PAGE_PWT;
140     #elif defined(__powerpc__)
141     	prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
142     #elif defined(__mc68000__)
143     #ifdef SUN3_PAGE_NOCACHE
144     	if (MMU_IS_SUN3)
145     		prot |= SUN3_PAGE_NOCACHE;
146     	else
147     #endif
148     	if (MMU_IS_851 || MMU_IS_030)
149     		prot |= _PAGE_NOCACHE030;
150     	/* Use no-cache mode, serialized */
151     	else if (MMU_IS_040 || MMU_IS_060)
152     		prot = (prot & _CACHEMASK040) | _PAGE_NOCACHE_S;
153     #endif
154     
155     	return __pgprot(prot);
156     }
157     
158     #endif /* !pgprot_noncached */
159     
160     /*
161      * Architectures vary in how they handle caching for addresses 
162      * outside of main memory.
163      */
164     static inline int noncached_address(unsigned long addr)
165     {
166     #if defined(__i386__)
167     	/* 
168     	 * On the PPro and successors, the MTRRs are used to set
169     	 * memory types for physical addresses outside main memory, 
170     	 * so blindly setting PCD or PWT on those pages is wrong.
171     	 * For Pentiums and earlier, the surround logic should disable 
172     	 * caching for the high addresses through the KEN pin, but
173     	 * we maintain the tradition of paranoia in this code.
174     	 */
175      	return !( test_bit(X86_FEATURE_MTRR, &boot_cpu_data.x86_capability) ||
176     		  test_bit(X86_FEATURE_K6_MTRR, &boot_cpu_data.x86_capability) ||
177     		  test_bit(X86_FEATURE_CYRIX_ARR, &boot_cpu_data.x86_capability) ||
178     		  test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) )
179     	  && addr >= __pa(high_memory);
180     #else
181     	return addr >= __pa(high_memory);
182     #endif
183     }
184     
185     static int mmap_mem(struct file * file, struct vm_area_struct * vma)
186     {
187     	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
188     
189     	/*
190     	 * Accessing memory above the top the kernel knows about or
191     	 * through a file pointer that was marked O_SYNC will be
192     	 * done non-cached.
193     	 */
194     	if (noncached_address(offset) || (file->f_flags & O_SYNC))
195     		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
196     
197     	/* Don't try to swap out physical pages.. */
198     	vma->vm_flags |= VM_RESERVED;
199     
200     	/*
201     	 * Don't dump addresses that are not real memory to a core file.
202     	 */
203     	if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC))
204     		vma->vm_flags |= VM_IO;
205     
206     	if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start,
207     			     vma->vm_page_prot))
208     		return -EAGAIN;
209     	return 0;
210     }
211     
212     /*
213      * This function reads the *virtual* memory as seen by the kernel.
214      */
215     static ssize_t read_kmem(struct file *file, char *buf, 
216     			 size_t count, loff_t *ppos)
217     {
218     	unsigned long p = *ppos;
219     	ssize_t read = 0;
220     	ssize_t virtr = 0;
221     	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
222     		
223     	if (p < (unsigned long) high_memory) {
224     		read = count;
225     		if (count > (unsigned long) high_memory - p)
226     			read = (unsigned long) high_memory - p;
227     
228     #if defined(__sparc__) || defined(__mc68000__)
229     		/* we don't have page 0 mapped on sparc and m68k.. */
230     		if (p < PAGE_SIZE && read > 0) {
231     			size_t tmp = PAGE_SIZE - p;
232     			if (tmp > read) tmp = read;
233     			if (clear_user(buf, tmp))
234     				return -EFAULT;
235     			buf += tmp;
236     			p += tmp;
237     			read -= tmp;
238     			count -= tmp;
239     		}
240     #endif
241     		if (copy_to_user(buf, (char *)p, read))
242     			return -EFAULT;
243     		p += read;
244     		buf += read;
245     		count -= read;
246     	}
247     
248     	if (count > 0) {
249     		kbuf = (char *)__get_free_page(GFP_KERNEL);
250     		if (!kbuf)
251     			return -ENOMEM;
252     		while (count > 0) {
253     			int len = count;
254     
255     			if (len > PAGE_SIZE)
256     				len = PAGE_SIZE;
257     			len = vread(kbuf, (char *)p, len);
258     			if (!len)
259     				break;
260     			if (copy_to_user(buf, kbuf, len)) {
261     				free_page((unsigned long)kbuf);
262     				return -EFAULT;
263     			}
264     			count -= len;
265     			buf += len;
266     			virtr += len;
267     			p += len;
268     		}
269     		free_page((unsigned long)kbuf);
270     	}
271      	*ppos = p;
272      	return virtr + read;
273     }
274     
275     /*
276      * This function writes to the *virtual* memory as seen by the kernel.
277      */
278     static ssize_t write_kmem(struct file * file, const char * buf, 
279     			  size_t count, loff_t *ppos)
280     {
281     	unsigned long p = *ppos;
282     
283     	if (p >= (unsigned long) high_memory)
284     		return 0;
285     	if (count > (unsigned long) high_memory - p)
286     		count = (unsigned long) high_memory - p;
287     	return do_write_mem(file, (void*)p, p, buf, count, ppos);
288     }
289     
290     #if !defined(__mc68000__)
291     static ssize_t read_port(struct file * file, char * buf,
292     			 size_t count, loff_t *ppos)
293     {
294     	unsigned long i = *ppos;
295     	char *tmp = buf;
296     
297     	if (verify_area(VERIFY_WRITE,buf,count))
298     		return -EFAULT; 
299     	while (count-- > 0 && i < 65536) {
300     		if (__put_user(inb(i),tmp) < 0) 
301     			return -EFAULT;  
302     		i++;
303     		tmp++;
304     	}
305     	*ppos = i;
306     	return tmp-buf;
307     }
308     
309     static ssize_t write_port(struct file * file, const char * buf,
310     			  size_t count, loff_t *ppos)
311     {
312     	unsigned long i = *ppos;
313     	const char * tmp = buf;
314     
315     	if (verify_area(VERIFY_READ,buf,count))
316     		return -EFAULT;
317     	while (count-- > 0 && i < 65536) {
318     		char c;
319     		if (__get_user(c, tmp)) 
320     			return -EFAULT; 
321     		outb(c,i);
322     		i++;
323     		tmp++;
324     	}
325     	*ppos = i;
326     	return tmp-buf;
327     }
328     #endif
329     
330     static ssize_t read_null(struct file * file, char * buf,
331     			 size_t count, loff_t *ppos)
332     {
333     	return 0;
334     }
335     
336     static ssize_t write_null(struct file * file, const char * buf,
337     			  size_t count, loff_t *ppos)
338     {
339     	return count;
340     }
341     
342     /*
343      * For fun, we are using the MMU for this.
344      */
345     static inline size_t read_zero_pagealigned(char * buf, size_t size)
346     {
347     	struct mm_struct *mm;
348     	struct vm_area_struct * vma;
349     	unsigned long addr=(unsigned long)buf;
350     
351     	mm = current->mm;
352     	/* Oops, this was forgotten before. -ben */
353     	down_read(&mm->mmap_sem);
354     
355     	/* For private mappings, just map in zero pages. */
356     	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
357     		unsigned long count;
358     
359     		if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
360     			goto out_up;
361     		if (vma->vm_flags & VM_SHARED)
362     			break;
363     		count = vma->vm_end - addr;
364     		if (count > size)
365     			count = size;
366     
367     		zap_page_range(mm, addr, count);
368             	zeromap_page_range(addr, count, PAGE_COPY);
369     
370     		size -= count;
371     		buf += count;
372     		addr += count;
373     		if (size == 0)
374     			goto out_up;
375     	}
376     
377     	up_read(&mm->mmap_sem);
378     	
379     	/* The shared case is hard. Let's do the conventional zeroing. */ 
380     	do {
381     		unsigned long unwritten = clear_user(buf, PAGE_SIZE);
382     		if (unwritten)
383     			return size + unwritten - PAGE_SIZE;
384     		if (current->need_resched)
385     			schedule();
386     		buf += PAGE_SIZE;
387     		size -= PAGE_SIZE;
388     	} while (size);
389     
390     	return size;
391     out_up:
392     	up_read(&mm->mmap_sem);
393     	return size;
394     }
395     
396     static ssize_t read_zero(struct file * file, char * buf, 
397     			 size_t count, loff_t *ppos)
398     {
399     	unsigned long left, unwritten, written = 0;
400     
401     	if (!count)
402     		return 0;
403     
404     	if (!access_ok(VERIFY_WRITE, buf, count))
405     		return -EFAULT;
406     
407     	left = count;
408     
409     	/* do we want to be clever? Arbitrary cut-off */
410     	if (count >= PAGE_SIZE*4) {
411     		unsigned long partial;
412     
413     		/* How much left of the page? */
414     		partial = (PAGE_SIZE-1) & -(unsigned long) buf;
415     		unwritten = clear_user(buf, partial);
416     		written = partial - unwritten;
417     		if (unwritten)
418     			goto out;
419     		left -= partial;
420     		buf += partial;
421     		unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
422     		written += (left & PAGE_MASK) - unwritten;
423     		if (unwritten)
424     			goto out;
425     		buf += left & PAGE_MASK;
426     		left &= ~PAGE_MASK;
427     	}
428     	unwritten = clear_user(buf, left);
429     	written += left - unwritten;
430     out:
431     	return written ? written : -EFAULT;
432     }
433     
434     static int mmap_zero(struct file * file, struct vm_area_struct * vma)
435     {
436     	if (vma->vm_flags & VM_SHARED)
437     		return shmem_zero_setup(vma);
438     	if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
439     		return -EAGAIN;
440     	return 0;
441     }
442     
443     static ssize_t write_full(struct file * file, const char * buf,
444     			  size_t count, loff_t *ppos)
445     {
446     	return -ENOSPC;
447     }
448     
449     /*
450      * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
451      * can fopen() both devices with "a" now.  This was previously impossible.
452      * -- SRB.
453      */
454     
455     static loff_t null_lseek(struct file * file, loff_t offset, int orig)
456     {
457     	return file->f_pos = 0;
458     }
459     
460     /*
461      * The memory devices use the full 32/64 bits of the offset, and so we cannot
462      * check against negative addresses: they are ok. The return value is weird,
463      * though, in that case (0).
464      *
465      * also note that seeking relative to the "end of file" isn't supported:
466      * it has no meaning, so it returns -EINVAL.
467      */
468     static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
469     {
470     	switch (orig) {
471     		case 0:
472     			file->f_pos = offset;
473     			return file->f_pos;
474     		case 1:
475     			file->f_pos += offset;
476     			return file->f_pos;
477     		default:
478     			return -EINVAL;
479     	}
480     }
481     
482     static int open_port(struct inode * inode, struct file * filp)
483     {
484     	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
485     }
486     
487     #define mmap_kmem	mmap_mem
488     #define zero_lseek	null_lseek
489     #define full_lseek      null_lseek
490     #define write_zero	write_null
491     #define read_full       read_zero
492     #define open_mem	open_port
493     #define open_kmem	open_mem
494     
495     static struct file_operations mem_fops = {
496     	llseek:		memory_lseek,
497     	read:		read_mem,
498     	write:		write_mem,
499     	mmap:		mmap_mem,
500     	open:		open_mem,
501     };
502     
503     static struct file_operations kmem_fops = {
504     	llseek:		memory_lseek,
505     	read:		read_kmem,
506     	write:		write_kmem,
507     	mmap:		mmap_kmem,
508     	open:		open_kmem,
509     };
510     
511     static struct file_operations null_fops = {
512     	llseek:		null_lseek,
513     	read:		read_null,
514     	write:		write_null,
515     };
516     
517     #if !defined(__mc68000__)
518     static struct file_operations port_fops = {
519     	llseek:		memory_lseek,
520     	read:		read_port,
521     	write:		write_port,
522     	open:		open_port,
523     };
524     #endif
525     
526     static struct file_operations zero_fops = {
527     	llseek:		zero_lseek,
528     	read:		read_zero,
529     	write:		write_zero,
530     	mmap:		mmap_zero,
531     };
532     
533     static struct file_operations full_fops = {
534     	llseek:		full_lseek,
535     	read:		read_full,
536     	write:		write_full,
537     };
538     
539     static int memory_open(struct inode * inode, struct file * filp)
540     {
541     	switch (MINOR(inode->i_rdev)) {
542     		case 1:
543     			filp->f_op = &mem_fops;
544     			break;
545     		case 2:
546     			filp->f_op = &kmem_fops;
547     			break;
548     		case 3:
549     			filp->f_op = &null_fops;
550     			break;
551     #if !defined(__mc68000__)
552     		case 4:
553     			filp->f_op = &port_fops;
554     			break;
555     #endif
556     		case 5:
557     			filp->f_op = &zero_fops;
558     			break;
559     		case 7:
560     			filp->f_op = &full_fops;
561     			break;
562     		case 8:
563     			filp->f_op = &random_fops;
564     			break;
565     		case 9:
566     			filp->f_op = &urandom_fops;
567     			break;
568     		default:
569     			return -ENXIO;
570     	}
571     	if (filp->f_op && filp->f_op->open)
572     		return filp->f_op->open(inode,filp);
573     	return 0;
574     }
575     
576     void __init memory_devfs_register (void)
577     {
578         /*  These are never unregistered  */
579         static const struct {
580     	unsigned short minor;
581     	char *name;
582     	umode_t mode;
583     	struct file_operations *fops;
584         } list[] = { /* list of minor devices */
585     	{1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
586     	{2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
587     	{3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
588     	{4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
589     	{5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
590     	{7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
591     	{8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
592     	{9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops}
593         };
594         int i;
595     
596         for (i=0; i<(sizeof(list)/sizeof(*list)); i++)
597     	devfs_register (NULL, list[i].name, DEVFS_FL_NONE,
598     			MEM_MAJOR, list[i].minor,
599     			list[i].mode | S_IFCHR,
600     			list[i].fops, NULL);
601     }
602     
603     static struct file_operations memory_fops = {
604     	open:		memory_open,	/* just a selector for the real open */
605     };
606     
607     int __init chr_dev_init(void)
608     {
609     	if (devfs_register_chrdev(MEM_MAJOR,"mem",&memory_fops))
610     		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
611     	memory_devfs_register();
612     	rand_initialize();
613     #ifdef CONFIG_I2C
614     	i2c_init_all();
615     #endif
616     #if defined (CONFIG_FB)
617     	fbmem_init();
618     #endif
619     #if defined (CONFIG_PROM_CONSOLE)
620     	prom_con_init();
621     #endif
622     #if defined (CONFIG_MDA_CONSOLE)
623     	mda_console_init();
624     #endif
625     	tty_init();
626     #ifdef CONFIG_M68K_PRINTER
627     	lp_m68k_init();
628     #endif
629     	misc_init();
630     #if CONFIG_QIC02_TAPE
631     	qic02_tape_init();
632     #endif
633     #ifdef CONFIG_FTAPE
634     	ftape_init();
635     #endif
636     #if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
637     	tapechar_init();
638     #endif
639     	return 0;
640     }
641     
642     __initcall(chr_dev_init);
643