]> git.openfabrics.org - ~shefty/rdma-dev.git/blob - drivers/xen/xenfs/privcmd.c
Linux 2.6.37-rc2
[~shefty/rdma-dev.git] / drivers / xen / xenfs / privcmd.c
1 /******************************************************************************
2  * privcmd.c
3  *
4  * Interface to privileged domain-0 commands.
5  *
6  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/mm.h>
15 #include <linux/mman.h>
16 #include <linux/uaccess.h>
17 #include <linux/swap.h>
18 #include <linux/smp_lock.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/seq_file.h>
22
23 #include <asm/pgalloc.h>
24 #include <asm/pgtable.h>
25 #include <asm/tlb.h>
26 #include <asm/xen/hypervisor.h>
27 #include <asm/xen/hypercall.h>
28
29 #include <xen/xen.h>
30 #include <xen/privcmd.h>
31 #include <xen/interface/xen.h>
32 #include <xen/features.h>
33 #include <xen/page.h>
34 #include <xen/xen-ops.h>
35
36 #ifndef HAVE_ARCH_PRIVCMD_MMAP
37 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
38 #endif
39
40 static long privcmd_ioctl_hypercall(void __user *udata)
41 {
42         struct privcmd_hypercall hypercall;
43         long ret;
44
45         if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
46                 return -EFAULT;
47
48         ret = privcmd_call(hypercall.op,
49                            hypercall.arg[0], hypercall.arg[1],
50                            hypercall.arg[2], hypercall.arg[3],
51                            hypercall.arg[4]);
52
53         return ret;
54 }
55
56 static void free_page_list(struct list_head *pages)
57 {
58         struct page *p, *n;
59
60         list_for_each_entry_safe(p, n, pages, lru)
61                 __free_page(p);
62
63         INIT_LIST_HEAD(pages);
64 }
65
66 /*
67  * Given an array of items in userspace, return a list of pages
68  * containing the data.  If copying fails, either because of memory
69  * allocation failure or a problem reading user memory, return an
70  * error code; its up to the caller to dispose of any partial list.
71  */
72 static int gather_array(struct list_head *pagelist,
73                         unsigned nelem, size_t size,
74                         void __user *data)
75 {
76         unsigned pageidx;
77         void *pagedata;
78         int ret;
79
80         if (size > PAGE_SIZE)
81                 return 0;
82
83         pageidx = PAGE_SIZE;
84         pagedata = NULL;        /* quiet, gcc */
85         while (nelem--) {
86                 if (pageidx > PAGE_SIZE-size) {
87                         struct page *page = alloc_page(GFP_KERNEL);
88
89                         ret = -ENOMEM;
90                         if (page == NULL)
91                                 goto fail;
92
93                         pagedata = page_address(page);
94
95                         list_add_tail(&page->lru, pagelist);
96                         pageidx = 0;
97                 }
98
99                 ret = -EFAULT;
100                 if (copy_from_user(pagedata + pageidx, data, size))
101                         goto fail;
102
103                 data += size;
104                 pageidx += size;
105         }
106
107         ret = 0;
108
109 fail:
110         return ret;
111 }
112
113 /*
114  * Call function "fn" on each element of the array fragmented
115  * over a list of pages.
116  */
117 static int traverse_pages(unsigned nelem, size_t size,
118                           struct list_head *pos,
119                           int (*fn)(void *data, void *state),
120                           void *state)
121 {
122         void *pagedata;
123         unsigned pageidx;
124         int ret = 0;
125
126         BUG_ON(size > PAGE_SIZE);
127
128         pageidx = PAGE_SIZE;
129         pagedata = NULL;        /* hush, gcc */
130
131         while (nelem--) {
132                 if (pageidx > PAGE_SIZE-size) {
133                         struct page *page;
134                         pos = pos->next;
135                         page = list_entry(pos, struct page, lru);
136                         pagedata = page_address(page);
137                         pageidx = 0;
138                 }
139
140                 ret = (*fn)(pagedata + pageidx, state);
141                 if (ret)
142                         break;
143                 pageidx += size;
144         }
145
146         return ret;
147 }
148
149 struct mmap_mfn_state {
150         unsigned long va;
151         struct vm_area_struct *vma;
152         domid_t domain;
153 };
154
155 static int mmap_mfn_range(void *data, void *state)
156 {
157         struct privcmd_mmap_entry *msg = data;
158         struct mmap_mfn_state *st = state;
159         struct vm_area_struct *vma = st->vma;
160         int rc;
161
162         /* Do not allow range to wrap the address space. */
163         if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
164             ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
165                 return -EINVAL;
166
167         /* Range chunks must be contiguous in va space. */
168         if ((msg->va != st->va) ||
169             ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
170                 return -EINVAL;
171
172         rc = xen_remap_domain_mfn_range(vma,
173                                         msg->va & PAGE_MASK,
174                                         msg->mfn, msg->npages,
175                                         vma->vm_page_prot,
176                                         st->domain);
177         if (rc < 0)
178                 return rc;
179
180         st->va += msg->npages << PAGE_SHIFT;
181
182         return 0;
183 }
184
185 static long privcmd_ioctl_mmap(void __user *udata)
186 {
187         struct privcmd_mmap mmapcmd;
188         struct mm_struct *mm = current->mm;
189         struct vm_area_struct *vma;
190         int rc;
191         LIST_HEAD(pagelist);
192         struct mmap_mfn_state state;
193
194         if (!xen_initial_domain())
195                 return -EPERM;
196
197         if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
198                 return -EFAULT;
199
200         rc = gather_array(&pagelist,
201                           mmapcmd.num, sizeof(struct privcmd_mmap_entry),
202                           mmapcmd.entry);
203
204         if (rc || list_empty(&pagelist))
205                 goto out;
206
207         down_write(&mm->mmap_sem);
208
209         {
210                 struct page *page = list_first_entry(&pagelist,
211                                                      struct page, lru);
212                 struct privcmd_mmap_entry *msg = page_address(page);
213
214                 vma = find_vma(mm, msg->va);
215                 rc = -EINVAL;
216
217                 if (!vma || (msg->va != vma->vm_start) ||
218                     !privcmd_enforce_singleshot_mapping(vma))
219                         goto out_up;
220         }
221
222         state.va = vma->vm_start;
223         state.vma = vma;
224         state.domain = mmapcmd.dom;
225
226         rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
227                             &pagelist,
228                             mmap_mfn_range, &state);
229
230
231 out_up:
232         up_write(&mm->mmap_sem);
233
234 out:
235         free_page_list(&pagelist);
236
237         return rc;
238 }
239
240 struct mmap_batch_state {
241         domid_t domain;
242         unsigned long va;
243         struct vm_area_struct *vma;
244         int err;
245
246         xen_pfn_t __user *user;
247 };
248
249 static int mmap_batch_fn(void *data, void *state)
250 {
251         xen_pfn_t *mfnp = data;
252         struct mmap_batch_state *st = state;
253
254         if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
255                                        st->vma->vm_page_prot, st->domain) < 0) {
256                 *mfnp |= 0xf0000000U;
257                 st->err++;
258         }
259         st->va += PAGE_SIZE;
260
261         return 0;
262 }
263
264 static int mmap_return_errors(void *data, void *state)
265 {
266         xen_pfn_t *mfnp = data;
267         struct mmap_batch_state *st = state;
268
269         put_user(*mfnp, st->user++);
270
271         return 0;
272 }
273
274 static struct vm_operations_struct privcmd_vm_ops;
275
276 static long privcmd_ioctl_mmap_batch(void __user *udata)
277 {
278         int ret;
279         struct privcmd_mmapbatch m;
280         struct mm_struct *mm = current->mm;
281         struct vm_area_struct *vma;
282         unsigned long nr_pages;
283         LIST_HEAD(pagelist);
284         struct mmap_batch_state state;
285
286         if (!xen_initial_domain())
287                 return -EPERM;
288
289         if (copy_from_user(&m, udata, sizeof(m)))
290                 return -EFAULT;
291
292         nr_pages = m.num;
293         if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
294                 return -EINVAL;
295
296         ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
297                            m.arr);
298
299         if (ret || list_empty(&pagelist))
300                 goto out;
301
302         down_write(&mm->mmap_sem);
303
304         vma = find_vma(mm, m.addr);
305         ret = -EINVAL;
306         if (!vma ||
307             vma->vm_ops != &privcmd_vm_ops ||
308             (m.addr != vma->vm_start) ||
309             ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
310             !privcmd_enforce_singleshot_mapping(vma)) {
311                 up_write(&mm->mmap_sem);
312                 goto out;
313         }
314
315         state.domain = m.dom;
316         state.vma = vma;
317         state.va = m.addr;
318         state.err = 0;
319
320         ret = traverse_pages(m.num, sizeof(xen_pfn_t),
321                              &pagelist, mmap_batch_fn, &state);
322
323         up_write(&mm->mmap_sem);
324
325         if (state.err > 0) {
326                 ret = 0;
327
328                 state.user = m.arr;
329                 traverse_pages(m.num, sizeof(xen_pfn_t),
330                                &pagelist,
331                                mmap_return_errors, &state);
332         }
333
334 out:
335         free_page_list(&pagelist);
336
337         return ret;
338 }
339
340 static long privcmd_ioctl(struct file *file,
341                           unsigned int cmd, unsigned long data)
342 {
343         int ret = -ENOSYS;
344         void __user *udata = (void __user *) data;
345
346         switch (cmd) {
347         case IOCTL_PRIVCMD_HYPERCALL:
348                 ret = privcmd_ioctl_hypercall(udata);
349                 break;
350
351         case IOCTL_PRIVCMD_MMAP:
352                 ret = privcmd_ioctl_mmap(udata);
353                 break;
354
355         case IOCTL_PRIVCMD_MMAPBATCH:
356                 ret = privcmd_ioctl_mmap_batch(udata);
357                 break;
358
359         default:
360                 ret = -EINVAL;
361                 break;
362         }
363
364         return ret;
365 }
366
367 #ifndef HAVE_ARCH_PRIVCMD_MMAP
368 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
369 {
370         printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
371                vma, vma->vm_start, vma->vm_end,
372                vmf->pgoff, vmf->virtual_address);
373
374         return VM_FAULT_SIGBUS;
375 }
376
377 static struct vm_operations_struct privcmd_vm_ops = {
378         .fault = privcmd_fault
379 };
380
381 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
382 {
383         /* Unsupported for auto-translate guests. */
384         if (xen_feature(XENFEAT_auto_translated_physmap))
385                 return -ENOSYS;
386
387         /* DONTCOPY is essential for Xen as copy_page_range is broken. */
388         vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
389         vma->vm_ops = &privcmd_vm_ops;
390         vma->vm_private_data = NULL;
391
392         return 0;
393 }
394
395 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
396 {
397         return (xchg(&vma->vm_private_data, (void *)1) == NULL);
398 }
399 #endif
400
401 const struct file_operations privcmd_file_ops = {
402         .unlocked_ioctl = privcmd_ioctl,
403         .mmap = privcmd_mmap,
404 };