]> git.openfabrics.org - ~shefty/rdma-dev.git/blob - fs/btrfs/free-space-cache.c
6c7887a7770c08946c23cb0af3adc0f2f6e76a79
[~shefty/rdma-dev.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30
31 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35                            struct btrfs_free_space *info);
36
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38                                                struct btrfs_path *path,
39                                                u64 offset)
40 {
41         struct btrfs_key key;
42         struct btrfs_key location;
43         struct btrfs_disk_key disk_key;
44         struct btrfs_free_space_header *header;
45         struct extent_buffer *leaf;
46         struct inode *inode = NULL;
47         int ret;
48
49         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50         key.offset = offset;
51         key.type = 0;
52
53         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54         if (ret < 0)
55                 return ERR_PTR(ret);
56         if (ret > 0) {
57                 btrfs_release_path(path);
58                 return ERR_PTR(-ENOENT);
59         }
60
61         leaf = path->nodes[0];
62         header = btrfs_item_ptr(leaf, path->slots[0],
63                                 struct btrfs_free_space_header);
64         btrfs_free_space_key(leaf, header, &disk_key);
65         btrfs_disk_key_to_cpu(&location, &disk_key);
66         btrfs_release_path(path);
67
68         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69         if (!inode)
70                 return ERR_PTR(-ENOENT);
71         if (IS_ERR(inode))
72                 return inode;
73         if (is_bad_inode(inode)) {
74                 iput(inode);
75                 return ERR_PTR(-ENOENT);
76         }
77
78         inode->i_mapping->flags &= ~__GFP_FS;
79
80         return inode;
81 }
82
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84                                       struct btrfs_block_group_cache
85                                       *block_group, struct btrfs_path *path)
86 {
87         struct inode *inode = NULL;
88         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
89
90         spin_lock(&block_group->lock);
91         if (block_group->inode)
92                 inode = igrab(block_group->inode);
93         spin_unlock(&block_group->lock);
94         if (inode)
95                 return inode;
96
97         inode = __lookup_free_space_inode(root, path,
98                                           block_group->key.objectid);
99         if (IS_ERR(inode))
100                 return inode;
101
102         spin_lock(&block_group->lock);
103         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
104                 printk(KERN_INFO "Old style space inode found, converting.\n");
105                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106                         BTRFS_INODE_NODATACOW;
107                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108         }
109
110         if (!block_group->iref) {
111                 block_group->inode = igrab(inode);
112                 block_group->iref = 1;
113         }
114         spin_unlock(&block_group->lock);
115
116         return inode;
117 }
118
119 int __create_free_space_inode(struct btrfs_root *root,
120                               struct btrfs_trans_handle *trans,
121                               struct btrfs_path *path, u64 ino, u64 offset)
122 {
123         struct btrfs_key key;
124         struct btrfs_disk_key disk_key;
125         struct btrfs_free_space_header *header;
126         struct btrfs_inode_item *inode_item;
127         struct extent_buffer *leaf;
128         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
129         int ret;
130
131         ret = btrfs_insert_empty_inode(trans, root, path, ino);
132         if (ret)
133                 return ret;
134
135         /* We inline crc's for the free disk space cache */
136         if (ino != BTRFS_FREE_INO_OBJECTID)
137                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138
139         leaf = path->nodes[0];
140         inode_item = btrfs_item_ptr(leaf, path->slots[0],
141                                     struct btrfs_inode_item);
142         btrfs_item_key(leaf, &disk_key, path->slots[0]);
143         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144                              sizeof(*inode_item));
145         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146         btrfs_set_inode_size(leaf, inode_item, 0);
147         btrfs_set_inode_nbytes(leaf, inode_item, 0);
148         btrfs_set_inode_uid(leaf, inode_item, 0);
149         btrfs_set_inode_gid(leaf, inode_item, 0);
150         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
151         btrfs_set_inode_flags(leaf, inode_item, flags);
152         btrfs_set_inode_nlink(leaf, inode_item, 1);
153         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
154         btrfs_set_inode_block_group(leaf, inode_item, offset);
155         btrfs_mark_buffer_dirty(leaf);
156         btrfs_release_path(path);
157
158         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
159         key.offset = offset;
160         key.type = 0;
161
162         ret = btrfs_insert_empty_item(trans, root, path, &key,
163                                       sizeof(struct btrfs_free_space_header));
164         if (ret < 0) {
165                 btrfs_release_path(path);
166                 return ret;
167         }
168         leaf = path->nodes[0];
169         header = btrfs_item_ptr(leaf, path->slots[0],
170                                 struct btrfs_free_space_header);
171         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172         btrfs_set_free_space_key(leaf, header, &disk_key);
173         btrfs_mark_buffer_dirty(leaf);
174         btrfs_release_path(path);
175
176         return 0;
177 }
178
179 int create_free_space_inode(struct btrfs_root *root,
180                             struct btrfs_trans_handle *trans,
181                             struct btrfs_block_group_cache *block_group,
182                             struct btrfs_path *path)
183 {
184         int ret;
185         u64 ino;
186
187         ret = btrfs_find_free_objectid(root, &ino);
188         if (ret < 0)
189                 return ret;
190
191         return __create_free_space_inode(root, trans, path, ino,
192                                          block_group->key.objectid);
193 }
194
195 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196                                     struct btrfs_trans_handle *trans,
197                                     struct btrfs_path *path,
198                                     struct inode *inode)
199 {
200         struct btrfs_block_rsv *rsv;
201         u64 needed_bytes;
202         loff_t oldsize;
203         int ret = 0;
204
205         rsv = trans->block_rsv;
206         trans->block_rsv = &root->fs_info->global_block_rsv;
207
208         /* 1 for slack space, 1 for updating the inode */
209         needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210                 btrfs_calc_trans_metadata_size(root, 1);
211
212         spin_lock(&trans->block_rsv->lock);
213         if (trans->block_rsv->reserved < needed_bytes) {
214                 spin_unlock(&trans->block_rsv->lock);
215                 trans->block_rsv = rsv;
216                 return -ENOSPC;
217         }
218         spin_unlock(&trans->block_rsv->lock);
219
220         oldsize = i_size_read(inode);
221         btrfs_i_size_write(inode, 0);
222         truncate_pagecache(inode, oldsize, 0);
223
224         /*
225          * We don't need an orphan item because truncating the free space cache
226          * will never be split across transactions.
227          */
228         ret = btrfs_truncate_inode_items(trans, root, inode,
229                                          0, BTRFS_EXTENT_DATA_KEY);
230
231         if (ret) {
232                 trans->block_rsv = rsv;
233                 WARN_ON(1);
234                 return ret;
235         }
236
237         ret = btrfs_update_inode(trans, root, inode);
238         trans->block_rsv = rsv;
239
240         return ret;
241 }
242
243 static int readahead_cache(struct inode *inode)
244 {
245         struct file_ra_state *ra;
246         unsigned long last_index;
247
248         ra = kzalloc(sizeof(*ra), GFP_NOFS);
249         if (!ra)
250                 return -ENOMEM;
251
252         file_ra_state_init(ra, inode->i_mapping);
253         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
254
255         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
256
257         kfree(ra);
258
259         return 0;
260 }
261
262 struct io_ctl {
263         void *cur, *orig;
264         struct page *page;
265         struct page **pages;
266         struct btrfs_root *root;
267         unsigned long size;
268         int index;
269         int num_pages;
270         unsigned check_crcs:1;
271 };
272
273 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
274                        struct btrfs_root *root)
275 {
276         memset(io_ctl, 0, sizeof(struct io_ctl));
277         io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
278                 PAGE_CACHE_SHIFT;
279         io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
280                                 GFP_NOFS);
281         if (!io_ctl->pages)
282                 return -ENOMEM;
283         io_ctl->root = root;
284         if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
285                 io_ctl->check_crcs = 1;
286         return 0;
287 }
288
289 static void io_ctl_free(struct io_ctl *io_ctl)
290 {
291         kfree(io_ctl->pages);
292 }
293
294 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
295 {
296         if (io_ctl->cur) {
297                 kunmap(io_ctl->page);
298                 io_ctl->cur = NULL;
299                 io_ctl->orig = NULL;
300         }
301 }
302
303 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
304 {
305         WARN_ON(io_ctl->cur);
306         BUG_ON(io_ctl->index >= io_ctl->num_pages);
307         io_ctl->page = io_ctl->pages[io_ctl->index++];
308         io_ctl->cur = kmap(io_ctl->page);
309         io_ctl->orig = io_ctl->cur;
310         io_ctl->size = PAGE_CACHE_SIZE;
311         if (clear)
312                 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
313 }
314
315 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
316 {
317         int i;
318
319         io_ctl_unmap_page(io_ctl);
320
321         for (i = 0; i < io_ctl->num_pages; i++) {
322                 if (io_ctl->pages[i]) {
323                         ClearPageChecked(io_ctl->pages[i]);
324                         unlock_page(io_ctl->pages[i]);
325                         page_cache_release(io_ctl->pages[i]);
326                 }
327         }
328 }
329
330 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
331                                 int uptodate)
332 {
333         struct page *page;
334         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
335         int i;
336
337         for (i = 0; i < io_ctl->num_pages; i++) {
338                 page = find_or_create_page(inode->i_mapping, i, mask);
339                 if (!page) {
340                         io_ctl_drop_pages(io_ctl);
341                         return -ENOMEM;
342                 }
343                 io_ctl->pages[i] = page;
344                 if (uptodate && !PageUptodate(page)) {
345                         btrfs_readpage(NULL, page);
346                         lock_page(page);
347                         if (!PageUptodate(page)) {
348                                 printk(KERN_ERR "btrfs: error reading free "
349                                        "space cache\n");
350                                 io_ctl_drop_pages(io_ctl);
351                                 return -EIO;
352                         }
353                 }
354         }
355
356         for (i = 0; i < io_ctl->num_pages; i++) {
357                 clear_page_dirty_for_io(io_ctl->pages[i]);
358                 set_page_extent_mapped(io_ctl->pages[i]);
359         }
360
361         return 0;
362 }
363
364 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
365 {
366         u64 *val;
367
368         io_ctl_map_page(io_ctl, 1);
369
370         /*
371          * Skip the csum areas.  If we don't check crcs then we just have a
372          * 64bit chunk at the front of the first page.
373          */
374         if (io_ctl->check_crcs) {
375                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
376                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
377         } else {
378                 io_ctl->cur += sizeof(u64);
379                 io_ctl->size -= sizeof(u64) * 2;
380         }
381
382         val = io_ctl->cur;
383         *val = cpu_to_le64(generation);
384         io_ctl->cur += sizeof(u64);
385 }
386
387 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
388 {
389         u64 *gen;
390
391         /*
392          * Skip the crc area.  If we don't check crcs then we just have a 64bit
393          * chunk at the front of the first page.
394          */
395         if (io_ctl->check_crcs) {
396                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
397                 io_ctl->size -= sizeof(u64) +
398                         (sizeof(u32) * io_ctl->num_pages);
399         } else {
400                 io_ctl->cur += sizeof(u64);
401                 io_ctl->size -= sizeof(u64) * 2;
402         }
403
404         gen = io_ctl->cur;
405         if (le64_to_cpu(*gen) != generation) {
406                 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
407                                    "(%Lu) does not match inode (%Lu)\n", *gen,
408                                    generation);
409                 io_ctl_unmap_page(io_ctl);
410                 return -EIO;
411         }
412         io_ctl->cur += sizeof(u64);
413         return 0;
414 }
415
416 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
417 {
418         u32 *tmp;
419         u32 crc = ~(u32)0;
420         unsigned offset = 0;
421
422         if (!io_ctl->check_crcs) {
423                 io_ctl_unmap_page(io_ctl);
424                 return;
425         }
426
427         if (index == 0)
428                 offset = sizeof(u32) * io_ctl->num_pages;;
429
430         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
431                               PAGE_CACHE_SIZE - offset);
432         btrfs_csum_final(crc, (char *)&crc);
433         io_ctl_unmap_page(io_ctl);
434         tmp = kmap(io_ctl->pages[0]);
435         tmp += index;
436         *tmp = crc;
437         kunmap(io_ctl->pages[0]);
438 }
439
440 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
441 {
442         u32 *tmp, val;
443         u32 crc = ~(u32)0;
444         unsigned offset = 0;
445
446         if (!io_ctl->check_crcs) {
447                 io_ctl_map_page(io_ctl, 0);
448                 return 0;
449         }
450
451         if (index == 0)
452                 offset = sizeof(u32) * io_ctl->num_pages;
453
454         tmp = kmap(io_ctl->pages[0]);
455         tmp += index;
456         val = *tmp;
457         kunmap(io_ctl->pages[0]);
458
459         io_ctl_map_page(io_ctl, 0);
460         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
461                               PAGE_CACHE_SIZE - offset);
462         btrfs_csum_final(crc, (char *)&crc);
463         if (val != crc) {
464                 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
465                                    "space cache\n");
466                 io_ctl_unmap_page(io_ctl);
467                 return -EIO;
468         }
469
470         return 0;
471 }
472
473 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
474                             void *bitmap)
475 {
476         struct btrfs_free_space_entry *entry;
477
478         if (!io_ctl->cur)
479                 return -ENOSPC;
480
481         entry = io_ctl->cur;
482         entry->offset = cpu_to_le64(offset);
483         entry->bytes = cpu_to_le64(bytes);
484         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
485                 BTRFS_FREE_SPACE_EXTENT;
486         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
487         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
488
489         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
490                 return 0;
491
492         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
493
494         /* No more pages to map */
495         if (io_ctl->index >= io_ctl->num_pages)
496                 return 0;
497
498         /* map the next page */
499         io_ctl_map_page(io_ctl, 1);
500         return 0;
501 }
502
503 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
504 {
505         if (!io_ctl->cur)
506                 return -ENOSPC;
507
508         /*
509          * If we aren't at the start of the current page, unmap this one and
510          * map the next one if there is any left.
511          */
512         if (io_ctl->cur != io_ctl->orig) {
513                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
514                 if (io_ctl->index >= io_ctl->num_pages)
515                         return -ENOSPC;
516                 io_ctl_map_page(io_ctl, 0);
517         }
518
519         memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
520         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
521         if (io_ctl->index < io_ctl->num_pages)
522                 io_ctl_map_page(io_ctl, 0);
523         return 0;
524 }
525
526 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
527 {
528         /*
529          * If we're not on the boundary we know we've modified the page and we
530          * need to crc the page.
531          */
532         if (io_ctl->cur != io_ctl->orig)
533                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
534         else
535                 io_ctl_unmap_page(io_ctl);
536
537         while (io_ctl->index < io_ctl->num_pages) {
538                 io_ctl_map_page(io_ctl, 1);
539                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
540         }
541 }
542
543 static int io_ctl_read_entry(struct io_ctl *io_ctl,
544                             struct btrfs_free_space *entry, u8 *type)
545 {
546         struct btrfs_free_space_entry *e;
547         int ret;
548
549         if (!io_ctl->cur) {
550                 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
551                 if (ret)
552                         return ret;
553         }
554
555         e = io_ctl->cur;
556         entry->offset = le64_to_cpu(e->offset);
557         entry->bytes = le64_to_cpu(e->bytes);
558         *type = e->type;
559         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
560         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
561
562         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
563                 return 0;
564
565         io_ctl_unmap_page(io_ctl);
566
567         return 0;
568 }
569
570 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
571                               struct btrfs_free_space *entry)
572 {
573         int ret;
574
575         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
576         if (ret)
577                 return ret;
578
579         memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
580         io_ctl_unmap_page(io_ctl);
581
582         return 0;
583 }
584
585 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
586                             struct btrfs_free_space_ctl *ctl,
587                             struct btrfs_path *path, u64 offset)
588 {
589         struct btrfs_free_space_header *header;
590         struct extent_buffer *leaf;
591         struct io_ctl io_ctl;
592         struct btrfs_key key;
593         struct btrfs_free_space *e, *n;
594         struct list_head bitmaps;
595         u64 num_entries;
596         u64 num_bitmaps;
597         u64 generation;
598         u8 type;
599         int ret = 0;
600
601         INIT_LIST_HEAD(&bitmaps);
602
603         /* Nothing in the space cache, goodbye */
604         if (!i_size_read(inode))
605                 return 0;
606
607         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
608         key.offset = offset;
609         key.type = 0;
610
611         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
612         if (ret < 0)
613                 return 0;
614         else if (ret > 0) {
615                 btrfs_release_path(path);
616                 return 0;
617         }
618
619         ret = -1;
620
621         leaf = path->nodes[0];
622         header = btrfs_item_ptr(leaf, path->slots[0],
623                                 struct btrfs_free_space_header);
624         num_entries = btrfs_free_space_entries(leaf, header);
625         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
626         generation = btrfs_free_space_generation(leaf, header);
627         btrfs_release_path(path);
628
629         if (BTRFS_I(inode)->generation != generation) {
630                 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
631                        " not match free space cache generation (%llu)\n",
632                        (unsigned long long)BTRFS_I(inode)->generation,
633                        (unsigned long long)generation);
634                 return 0;
635         }
636
637         if (!num_entries)
638                 return 0;
639
640         ret = io_ctl_init(&io_ctl, inode, root);
641         if (ret)
642                 return ret;
643
644         ret = readahead_cache(inode);
645         if (ret)
646                 goto out;
647
648         ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
649         if (ret)
650                 goto out;
651
652         ret = io_ctl_check_crc(&io_ctl, 0);
653         if (ret)
654                 goto free_cache;
655
656         ret = io_ctl_check_generation(&io_ctl, generation);
657         if (ret)
658                 goto free_cache;
659
660         while (num_entries) {
661                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
662                                       GFP_NOFS);
663                 if (!e)
664                         goto free_cache;
665
666                 ret = io_ctl_read_entry(&io_ctl, e, &type);
667                 if (ret) {
668                         kmem_cache_free(btrfs_free_space_cachep, e);
669                         goto free_cache;
670                 }
671
672                 if (!e->bytes) {
673                         kmem_cache_free(btrfs_free_space_cachep, e);
674                         goto free_cache;
675                 }
676
677                 if (type == BTRFS_FREE_SPACE_EXTENT) {
678                         spin_lock(&ctl->tree_lock);
679                         ret = link_free_space(ctl, e);
680                         spin_unlock(&ctl->tree_lock);
681                         if (ret) {
682                                 printk(KERN_ERR "Duplicate entries in "
683                                        "free space cache, dumping\n");
684                                 kmem_cache_free(btrfs_free_space_cachep, e);
685                                 goto free_cache;
686                         }
687                 } else {
688                         BUG_ON(!num_bitmaps);
689                         num_bitmaps--;
690                         e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
691                         if (!e->bitmap) {
692                                 kmem_cache_free(
693                                         btrfs_free_space_cachep, e);
694                                 goto free_cache;
695                         }
696                         spin_lock(&ctl->tree_lock);
697                         ret = link_free_space(ctl, e);
698                         ctl->total_bitmaps++;
699                         ctl->op->recalc_thresholds(ctl);
700                         spin_unlock(&ctl->tree_lock);
701                         if (ret) {
702                                 printk(KERN_ERR "Duplicate entries in "
703                                        "free space cache, dumping\n");
704                                 kmem_cache_free(btrfs_free_space_cachep, e);
705                                 goto free_cache;
706                         }
707                         list_add_tail(&e->list, &bitmaps);
708                 }
709
710                 num_entries--;
711         }
712
713         io_ctl_unmap_page(&io_ctl);
714
715         /*
716          * We add the bitmaps at the end of the entries in order that
717          * the bitmap entries are added to the cache.
718          */
719         list_for_each_entry_safe(e, n, &bitmaps, list) {
720                 list_del_init(&e->list);
721                 ret = io_ctl_read_bitmap(&io_ctl, e);
722                 if (ret)
723                         goto free_cache;
724         }
725
726         io_ctl_drop_pages(&io_ctl);
727         ret = 1;
728 out:
729         io_ctl_free(&io_ctl);
730         return ret;
731 free_cache:
732         io_ctl_drop_pages(&io_ctl);
733         __btrfs_remove_free_space_cache(ctl);
734         goto out;
735 }
736
737 int load_free_space_cache(struct btrfs_fs_info *fs_info,
738                           struct btrfs_block_group_cache *block_group)
739 {
740         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
741         struct btrfs_root *root = fs_info->tree_root;
742         struct inode *inode;
743         struct btrfs_path *path;
744         int ret = 0;
745         bool matched;
746         u64 used = btrfs_block_group_used(&block_group->item);
747
748         /*
749          * If we're unmounting then just return, since this does a search on the
750          * normal root and not the commit root and we could deadlock.
751          */
752         if (btrfs_fs_closing(fs_info))
753                 return 0;
754
755         /*
756          * If this block group has been marked to be cleared for one reason or
757          * another then we can't trust the on disk cache, so just return.
758          */
759         spin_lock(&block_group->lock);
760         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
761                 spin_unlock(&block_group->lock);
762                 return 0;
763         }
764         spin_unlock(&block_group->lock);
765
766         path = btrfs_alloc_path();
767         if (!path)
768                 return 0;
769
770         inode = lookup_free_space_inode(root, block_group, path);
771         if (IS_ERR(inode)) {
772                 btrfs_free_path(path);
773                 return 0;
774         }
775
776         /* We may have converted the inode and made the cache invalid. */
777         spin_lock(&block_group->lock);
778         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
779                 spin_unlock(&block_group->lock);
780                 goto out;
781         }
782         spin_unlock(&block_group->lock);
783
784         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
785                                       path, block_group->key.objectid);
786         btrfs_free_path(path);
787         if (ret <= 0)
788                 goto out;
789
790         spin_lock(&ctl->tree_lock);
791         matched = (ctl->free_space == (block_group->key.offset - used -
792                                        block_group->bytes_super));
793         spin_unlock(&ctl->tree_lock);
794
795         if (!matched) {
796                 __btrfs_remove_free_space_cache(ctl);
797                 printk(KERN_ERR "block group %llu has an wrong amount of free "
798                        "space\n", block_group->key.objectid);
799                 ret = -1;
800         }
801 out:
802         if (ret < 0) {
803                 /* This cache is bogus, make sure it gets cleared */
804                 spin_lock(&block_group->lock);
805                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
806                 spin_unlock(&block_group->lock);
807                 ret = 0;
808
809                 printk(KERN_ERR "btrfs: failed to load free space cache "
810                        "for block group %llu\n", block_group->key.objectid);
811         }
812
813         iput(inode);
814         return ret;
815 }
816
817 /**
818  * __btrfs_write_out_cache - write out cached info to an inode
819  * @root - the root the inode belongs to
820  * @ctl - the free space cache we are going to write out
821  * @block_group - the block_group for this cache if it belongs to a block_group
822  * @trans - the trans handle
823  * @path - the path to use
824  * @offset - the offset for the key we'll insert
825  *
826  * This function writes out a free space cache struct to disk for quick recovery
827  * on mount.  This will return 0 if it was successfull in writing the cache out,
828  * and -1 if it was not.
829  */
830 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
831                             struct btrfs_free_space_ctl *ctl,
832                             struct btrfs_block_group_cache *block_group,
833                             struct btrfs_trans_handle *trans,
834                             struct btrfs_path *path, u64 offset)
835 {
836         struct btrfs_free_space_header *header;
837         struct extent_buffer *leaf;
838         struct rb_node *node;
839         struct list_head *pos, *n;
840         struct extent_state *cached_state = NULL;
841         struct btrfs_free_cluster *cluster = NULL;
842         struct extent_io_tree *unpin = NULL;
843         struct io_ctl io_ctl;
844         struct list_head bitmap_list;
845         struct btrfs_key key;
846         u64 start, extent_start, extent_end, len;
847         int entries = 0;
848         int bitmaps = 0;
849         int ret;
850         int err = -1;
851
852         INIT_LIST_HEAD(&bitmap_list);
853
854         if (!i_size_read(inode))
855                 return -1;
856
857         ret = io_ctl_init(&io_ctl, inode, root);
858         if (ret)
859                 return -1;
860
861         /* Get the cluster for this block_group if it exists */
862         if (block_group && !list_empty(&block_group->cluster_list))
863                 cluster = list_entry(block_group->cluster_list.next,
864                                      struct btrfs_free_cluster,
865                                      block_group_list);
866
867         /* Lock all pages first so we can lock the extent safely. */
868         io_ctl_prepare_pages(&io_ctl, inode, 0);
869
870         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
871                          0, &cached_state, GFP_NOFS);
872
873         node = rb_first(&ctl->free_space_offset);
874         if (!node && cluster) {
875                 node = rb_first(&cluster->root);
876                 cluster = NULL;
877         }
878
879         /* Make sure we can fit our crcs into the first page */
880         if (io_ctl.check_crcs &&
881             (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
882                 WARN_ON(1);
883                 goto out_nospc;
884         }
885
886         io_ctl_set_generation(&io_ctl, trans->transid);
887
888         /* Write out the extent entries */
889         while (node) {
890                 struct btrfs_free_space *e;
891
892                 e = rb_entry(node, struct btrfs_free_space, offset_index);
893                 entries++;
894
895                 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
896                                        e->bitmap);
897                 if (ret)
898                         goto out_nospc;
899
900                 if (e->bitmap) {
901                         list_add_tail(&e->list, &bitmap_list);
902                         bitmaps++;
903                 }
904                 node = rb_next(node);
905                 if (!node && cluster) {
906                         node = rb_first(&cluster->root);
907                         cluster = NULL;
908                 }
909         }
910
911         /*
912          * We want to add any pinned extents to our free space cache
913          * so we don't leak the space
914          */
915
916         /*
917          * We shouldn't have switched the pinned extents yet so this is the
918          * right one
919          */
920         unpin = root->fs_info->pinned_extents;
921
922         if (block_group)
923                 start = block_group->key.objectid;
924
925         while (block_group && (start < block_group->key.objectid +
926                                block_group->key.offset)) {
927                 ret = find_first_extent_bit(unpin, start,
928                                             &extent_start, &extent_end,
929                                             EXTENT_DIRTY);
930                 if (ret) {
931                         ret = 0;
932                         break;
933                 }
934
935                 /* This pinned extent is out of our range */
936                 if (extent_start >= block_group->key.objectid +
937                     block_group->key.offset)
938                         break;
939
940                 extent_start = max(extent_start, start);
941                 extent_end = min(block_group->key.objectid +
942                                  block_group->key.offset, extent_end + 1);
943                 len = extent_end - extent_start;
944
945                 entries++;
946                 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
947                 if (ret)
948                         goto out_nospc;
949
950                 start = extent_end;
951         }
952
953         /* Write out the bitmaps */
954         list_for_each_safe(pos, n, &bitmap_list) {
955                 struct btrfs_free_space *entry =
956                         list_entry(pos, struct btrfs_free_space, list);
957
958                 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
959                 if (ret)
960                         goto out_nospc;
961                 list_del_init(&entry->list);
962         }
963
964         /* Zero out the rest of the pages just to make sure */
965         io_ctl_zero_remaining_pages(&io_ctl);
966
967         ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
968                                 0, i_size_read(inode), &cached_state);
969         io_ctl_drop_pages(&io_ctl);
970         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
971                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
972
973         if (ret)
974                 goto out;
975
976
977         ret = filemap_write_and_wait(inode->i_mapping);
978         if (ret)
979                 goto out;
980
981         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
982         key.offset = offset;
983         key.type = 0;
984
985         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
986         if (ret < 0) {
987                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
988                                  EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
989                                  GFP_NOFS);
990                 goto out;
991         }
992         leaf = path->nodes[0];
993         if (ret > 0) {
994                 struct btrfs_key found_key;
995                 BUG_ON(!path->slots[0]);
996                 path->slots[0]--;
997                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
998                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
999                     found_key.offset != offset) {
1000                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1001                                          inode->i_size - 1,
1002                                          EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1003                                          NULL, GFP_NOFS);
1004                         btrfs_release_path(path);
1005                         goto out;
1006                 }
1007         }
1008
1009         BTRFS_I(inode)->generation = trans->transid;
1010         header = btrfs_item_ptr(leaf, path->slots[0],
1011                                 struct btrfs_free_space_header);
1012         btrfs_set_free_space_entries(leaf, header, entries);
1013         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1014         btrfs_set_free_space_generation(leaf, header, trans->transid);
1015         btrfs_mark_buffer_dirty(leaf);
1016         btrfs_release_path(path);
1017
1018         err = 0;
1019 out:
1020         io_ctl_free(&io_ctl);
1021         if (err) {
1022                 invalidate_inode_pages2(inode->i_mapping);
1023                 BTRFS_I(inode)->generation = 0;
1024         }
1025         btrfs_update_inode(trans, root, inode);
1026         return err;
1027
1028 out_nospc:
1029         list_for_each_safe(pos, n, &bitmap_list) {
1030                 struct btrfs_free_space *entry =
1031                         list_entry(pos, struct btrfs_free_space, list);
1032                 list_del_init(&entry->list);
1033         }
1034         io_ctl_drop_pages(&io_ctl);
1035         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1036                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1037         goto out;
1038 }
1039
1040 int btrfs_write_out_cache(struct btrfs_root *root,
1041                           struct btrfs_trans_handle *trans,
1042                           struct btrfs_block_group_cache *block_group,
1043                           struct btrfs_path *path)
1044 {
1045         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1046         struct inode *inode;
1047         int ret = 0;
1048
1049         root = root->fs_info->tree_root;
1050
1051         spin_lock(&block_group->lock);
1052         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1053                 spin_unlock(&block_group->lock);
1054                 return 0;
1055         }
1056         spin_unlock(&block_group->lock);
1057
1058         inode = lookup_free_space_inode(root, block_group, path);
1059         if (IS_ERR(inode))
1060                 return 0;
1061
1062         ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1063                                       path, block_group->key.objectid);
1064         if (ret) {
1065                 spin_lock(&block_group->lock);
1066                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1067                 spin_unlock(&block_group->lock);
1068                 ret = 0;
1069 #ifdef DEBUG
1070                 printk(KERN_ERR "btrfs: failed to write free space cace "
1071                        "for block group %llu\n", block_group->key.objectid);
1072 #endif
1073         }
1074
1075         iput(inode);
1076         return ret;
1077 }
1078
1079 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1080                                           u64 offset)
1081 {
1082         BUG_ON(offset < bitmap_start);
1083         offset -= bitmap_start;
1084         return (unsigned long)(div_u64(offset, unit));
1085 }
1086
1087 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1088 {
1089         return (unsigned long)(div_u64(bytes, unit));
1090 }
1091
1092 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1093                                    u64 offset)
1094 {
1095         u64 bitmap_start;
1096         u64 bytes_per_bitmap;
1097
1098         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1099         bitmap_start = offset - ctl->start;
1100         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1101         bitmap_start *= bytes_per_bitmap;
1102         bitmap_start += ctl->start;
1103
1104         return bitmap_start;
1105 }
1106
1107 static int tree_insert_offset(struct rb_root *root, u64 offset,
1108                               struct rb_node *node, int bitmap)
1109 {
1110         struct rb_node **p = &root->rb_node;
1111         struct rb_node *parent = NULL;
1112         struct btrfs_free_space *info;
1113
1114         while (*p) {
1115                 parent = *p;
1116                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1117
1118                 if (offset < info->offset) {
1119                         p = &(*p)->rb_left;
1120                 } else if (offset > info->offset) {
1121                         p = &(*p)->rb_right;
1122                 } else {
1123                         /*
1124                          * we could have a bitmap entry and an extent entry
1125                          * share the same offset.  If this is the case, we want
1126                          * the extent entry to always be found first if we do a
1127                          * linear search through the tree, since we want to have
1128                          * the quickest allocation time, and allocating from an
1129                          * extent is faster than allocating from a bitmap.  So
1130                          * if we're inserting a bitmap and we find an entry at
1131                          * this offset, we want to go right, or after this entry
1132                          * logically.  If we are inserting an extent and we've
1133                          * found a bitmap, we want to go left, or before
1134                          * logically.
1135                          */
1136                         if (bitmap) {
1137                                 if (info->bitmap) {
1138                                         WARN_ON_ONCE(1);
1139                                         return -EEXIST;
1140                                 }
1141                                 p = &(*p)->rb_right;
1142                         } else {
1143                                 if (!info->bitmap) {
1144                                         WARN_ON_ONCE(1);
1145                                         return -EEXIST;
1146                                 }
1147                                 p = &(*p)->rb_left;
1148                         }
1149                 }
1150         }
1151
1152         rb_link_node(node, parent, p);
1153         rb_insert_color(node, root);
1154
1155         return 0;
1156 }
1157
1158 /*
1159  * searches the tree for the given offset.
1160  *
1161  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1162  * want a section that has at least bytes size and comes at or after the given
1163  * offset.
1164  */
1165 static struct btrfs_free_space *
1166 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1167                    u64 offset, int bitmap_only, int fuzzy)
1168 {
1169         struct rb_node *n = ctl->free_space_offset.rb_node;
1170         struct btrfs_free_space *entry, *prev = NULL;
1171
1172         /* find entry that is closest to the 'offset' */
1173         while (1) {
1174                 if (!n) {
1175                         entry = NULL;
1176                         break;
1177                 }
1178
1179                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1180                 prev = entry;
1181
1182                 if (offset < entry->offset)
1183                         n = n->rb_left;
1184                 else if (offset > entry->offset)
1185                         n = n->rb_right;
1186                 else
1187                         break;
1188         }
1189
1190         if (bitmap_only) {
1191                 if (!entry)
1192                         return NULL;
1193                 if (entry->bitmap)
1194                         return entry;
1195
1196                 /*
1197                  * bitmap entry and extent entry may share same offset,
1198                  * in that case, bitmap entry comes after extent entry.
1199                  */
1200                 n = rb_next(n);
1201                 if (!n)
1202                         return NULL;
1203                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1204                 if (entry->offset != offset)
1205                         return NULL;
1206
1207                 WARN_ON(!entry->bitmap);
1208                 return entry;
1209         } else if (entry) {
1210                 if (entry->bitmap) {
1211                         /*
1212                          * if previous extent entry covers the offset,
1213                          * we should return it instead of the bitmap entry
1214                          */
1215                         n = &entry->offset_index;
1216                         while (1) {
1217                                 n = rb_prev(n);
1218                                 if (!n)
1219                                         break;
1220                                 prev = rb_entry(n, struct btrfs_free_space,
1221                                                 offset_index);
1222                                 if (!prev->bitmap) {
1223                                         if (prev->offset + prev->bytes > offset)
1224                                                 entry = prev;
1225                                         break;
1226                                 }
1227                         }
1228                 }
1229                 return entry;
1230         }
1231
1232         if (!prev)
1233                 return NULL;
1234
1235         /* find last entry before the 'offset' */
1236         entry = prev;
1237         if (entry->offset > offset) {
1238                 n = rb_prev(&entry->offset_index);
1239                 if (n) {
1240                         entry = rb_entry(n, struct btrfs_free_space,
1241                                         offset_index);
1242                         BUG_ON(entry->offset > offset);
1243                 } else {
1244                         if (fuzzy)
1245                                 return entry;
1246                         else
1247                                 return NULL;
1248                 }
1249         }
1250
1251         if (entry->bitmap) {
1252                 n = &entry->offset_index;
1253                 while (1) {
1254                         n = rb_prev(n);
1255                         if (!n)
1256                                 break;
1257                         prev = rb_entry(n, struct btrfs_free_space,
1258                                         offset_index);
1259                         if (!prev->bitmap) {
1260                                 if (prev->offset + prev->bytes > offset)
1261                                         return prev;
1262                                 break;
1263                         }
1264                 }
1265                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1266                         return entry;
1267         } else if (entry->offset + entry->bytes > offset)
1268                 return entry;
1269
1270         if (!fuzzy)
1271                 return NULL;
1272
1273         while (1) {
1274                 if (entry->bitmap) {
1275                         if (entry->offset + BITS_PER_BITMAP *
1276                             ctl->unit > offset)
1277                                 break;
1278                 } else {
1279                         if (entry->offset + entry->bytes > offset)
1280                                 break;
1281                 }
1282
1283                 n = rb_next(&entry->offset_index);
1284                 if (!n)
1285                         return NULL;
1286                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1287         }
1288         return entry;
1289 }
1290
1291 static inline void
1292 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1293                     struct btrfs_free_space *info)
1294 {
1295         rb_erase(&info->offset_index, &ctl->free_space_offset);
1296         ctl->free_extents--;
1297 }
1298
1299 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1300                               struct btrfs_free_space *info)
1301 {
1302         __unlink_free_space(ctl, info);
1303         ctl->free_space -= info->bytes;
1304 }
1305
1306 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1307                            struct btrfs_free_space *info)
1308 {
1309         int ret = 0;
1310
1311         BUG_ON(!info->bitmap && !info->bytes);
1312         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1313                                  &info->offset_index, (info->bitmap != NULL));
1314         if (ret)
1315                 return ret;
1316
1317         ctl->free_space += info->bytes;
1318         ctl->free_extents++;
1319         return ret;
1320 }
1321
1322 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1323 {
1324         struct btrfs_block_group_cache *block_group = ctl->private;
1325         u64 max_bytes;
1326         u64 bitmap_bytes;
1327         u64 extent_bytes;
1328         u64 size = block_group->key.offset;
1329         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1330         int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1331
1332         BUG_ON(ctl->total_bitmaps > max_bitmaps);
1333
1334         /*
1335          * The goal is to keep the total amount of memory used per 1gb of space
1336          * at or below 32k, so we need to adjust how much memory we allow to be
1337          * used by extent based free space tracking
1338          */
1339         if (size < 1024 * 1024 * 1024)
1340                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1341         else
1342                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1343                         div64_u64(size, 1024 * 1024 * 1024);
1344
1345         /*
1346          * we want to account for 1 more bitmap than what we have so we can make
1347          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1348          * we add more bitmaps.
1349          */
1350         bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1351
1352         if (bitmap_bytes >= max_bytes) {
1353                 ctl->extents_thresh = 0;
1354                 return;
1355         }
1356
1357         /*
1358          * we want the extent entry threshold to always be at most 1/2 the maxw
1359          * bytes we can have, or whatever is less than that.
1360          */
1361         extent_bytes = max_bytes - bitmap_bytes;
1362         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1363
1364         ctl->extents_thresh =
1365                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1366 }
1367
1368 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1369                                        struct btrfs_free_space *info,
1370                                        u64 offset, u64 bytes)
1371 {
1372         unsigned long start, count;
1373
1374         start = offset_to_bit(info->offset, ctl->unit, offset);
1375         count = bytes_to_bits(bytes, ctl->unit);
1376         BUG_ON(start + count > BITS_PER_BITMAP);
1377
1378         bitmap_clear(info->bitmap, start, count);
1379
1380         info->bytes -= bytes;
1381 }
1382
1383 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1384                               struct btrfs_free_space *info, u64 offset,
1385                               u64 bytes)
1386 {
1387         __bitmap_clear_bits(ctl, info, offset, bytes);
1388         ctl->free_space -= bytes;
1389 }
1390
1391 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1392                             struct btrfs_free_space *info, u64 offset,
1393                             u64 bytes)
1394 {
1395         unsigned long start, count;
1396
1397         start = offset_to_bit(info->offset, ctl->unit, offset);
1398         count = bytes_to_bits(bytes, ctl->unit);
1399         BUG_ON(start + count > BITS_PER_BITMAP);
1400
1401         bitmap_set(info->bitmap, start, count);
1402
1403         info->bytes += bytes;
1404         ctl->free_space += bytes;
1405 }
1406
1407 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1408                          struct btrfs_free_space *bitmap_info, u64 *offset,
1409                          u64 *bytes)
1410 {
1411         unsigned long found_bits = 0;
1412         unsigned long bits, i;
1413         unsigned long next_zero;
1414
1415         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1416                           max_t(u64, *offset, bitmap_info->offset));
1417         bits = bytes_to_bits(*bytes, ctl->unit);
1418
1419         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1420              i < BITS_PER_BITMAP;
1421              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1422                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1423                                                BITS_PER_BITMAP, i);
1424                 if ((next_zero - i) >= bits) {
1425                         found_bits = next_zero - i;
1426                         break;
1427                 }
1428                 i = next_zero;
1429         }
1430
1431         if (found_bits) {
1432                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1433                 *bytes = (u64)(found_bits) * ctl->unit;
1434                 return 0;
1435         }
1436
1437         return -1;
1438 }
1439
1440 static struct btrfs_free_space *
1441 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1442 {
1443         struct btrfs_free_space *entry;
1444         struct rb_node *node;
1445         int ret;
1446
1447         if (!ctl->free_space_offset.rb_node)
1448                 return NULL;
1449
1450         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1451         if (!entry)
1452                 return NULL;
1453
1454         for (node = &entry->offset_index; node; node = rb_next(node)) {
1455                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1456                 if (entry->bytes < *bytes)
1457                         continue;
1458
1459                 if (entry->bitmap) {
1460                         ret = search_bitmap(ctl, entry, offset, bytes);
1461                         if (!ret)
1462                                 return entry;
1463                         continue;
1464                 }
1465
1466                 *offset = entry->offset;
1467                 *bytes = entry->bytes;
1468                 return entry;
1469         }
1470
1471         return NULL;
1472 }
1473
1474 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1475                            struct btrfs_free_space *info, u64 offset)
1476 {
1477         info->offset = offset_to_bitmap(ctl, offset);
1478         info->bytes = 0;
1479         INIT_LIST_HEAD(&info->list);
1480         link_free_space(ctl, info);
1481         ctl->total_bitmaps++;
1482
1483         ctl->op->recalc_thresholds(ctl);
1484 }
1485
1486 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1487                         struct btrfs_free_space *bitmap_info)
1488 {
1489         unlink_free_space(ctl, bitmap_info);
1490         kfree(bitmap_info->bitmap);
1491         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1492         ctl->total_bitmaps--;
1493         ctl->op->recalc_thresholds(ctl);
1494 }
1495
1496 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1497                               struct btrfs_free_space *bitmap_info,
1498                               u64 *offset, u64 *bytes)
1499 {
1500         u64 end;
1501         u64 search_start, search_bytes;
1502         int ret;
1503
1504 again:
1505         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1506
1507         /*
1508          * XXX - this can go away after a few releases.
1509          *
1510          * since the only user of btrfs_remove_free_space is the tree logging
1511          * stuff, and the only way to test that is under crash conditions, we
1512          * want to have this debug stuff here just in case somethings not
1513          * working.  Search the bitmap for the space we are trying to use to
1514          * make sure its actually there.  If its not there then we need to stop
1515          * because something has gone wrong.
1516          */
1517         search_start = *offset;
1518         search_bytes = *bytes;
1519         search_bytes = min(search_bytes, end - search_start + 1);
1520         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1521         BUG_ON(ret < 0 || search_start != *offset);
1522
1523         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1524                 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1525                 *bytes -= end - *offset + 1;
1526                 *offset = end + 1;
1527         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1528                 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1529                 *bytes = 0;
1530         }
1531
1532         if (*bytes) {
1533                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1534                 if (!bitmap_info->bytes)
1535                         free_bitmap(ctl, bitmap_info);
1536
1537                 /*
1538                  * no entry after this bitmap, but we still have bytes to
1539                  * remove, so something has gone wrong.
1540                  */
1541                 if (!next)
1542                         return -EINVAL;
1543
1544                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1545                                        offset_index);
1546
1547                 /*
1548                  * if the next entry isn't a bitmap we need to return to let the
1549                  * extent stuff do its work.
1550                  */
1551                 if (!bitmap_info->bitmap)
1552                         return -EAGAIN;
1553
1554                 /*
1555                  * Ok the next item is a bitmap, but it may not actually hold
1556                  * the information for the rest of this free space stuff, so
1557                  * look for it, and if we don't find it return so we can try
1558                  * everything over again.
1559                  */
1560                 search_start = *offset;
1561                 search_bytes = *bytes;
1562                 ret = search_bitmap(ctl, bitmap_info, &search_start,
1563                                     &search_bytes);
1564                 if (ret < 0 || search_start != *offset)
1565                         return -EAGAIN;
1566
1567                 goto again;
1568         } else if (!bitmap_info->bytes)
1569                 free_bitmap(ctl, bitmap_info);
1570
1571         return 0;
1572 }
1573
1574 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1575                                struct btrfs_free_space *info, u64 offset,
1576                                u64 bytes)
1577 {
1578         u64 bytes_to_set = 0;
1579         u64 end;
1580
1581         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1582
1583         bytes_to_set = min(end - offset, bytes);
1584
1585         bitmap_set_bits(ctl, info, offset, bytes_to_set);
1586
1587         return bytes_to_set;
1588
1589 }
1590
1591 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1592                       struct btrfs_free_space *info)
1593 {
1594         struct btrfs_block_group_cache *block_group = ctl->private;
1595
1596         /*
1597          * If we are below the extents threshold then we can add this as an
1598          * extent, and don't have to deal with the bitmap
1599          */
1600         if (ctl->free_extents < ctl->extents_thresh) {
1601                 /*
1602                  * If this block group has some small extents we don't want to
1603                  * use up all of our free slots in the cache with them, we want
1604                  * to reserve them to larger extents, however if we have plent
1605                  * of cache left then go ahead an dadd them, no sense in adding
1606                  * the overhead of a bitmap if we don't have to.
1607                  */
1608                 if (info->bytes <= block_group->sectorsize * 4) {
1609                         if (ctl->free_extents * 2 <= ctl->extents_thresh)
1610                                 return false;
1611                 } else {
1612                         return false;
1613                 }
1614         }
1615
1616         /*
1617          * some block groups are so tiny they can't be enveloped by a bitmap, so
1618          * don't even bother to create a bitmap for this
1619          */
1620         if (BITS_PER_BITMAP * block_group->sectorsize >
1621             block_group->key.offset)
1622                 return false;
1623
1624         return true;
1625 }
1626
1627 static struct btrfs_free_space_op free_space_op = {
1628         .recalc_thresholds      = recalculate_thresholds,
1629         .use_bitmap             = use_bitmap,
1630 };
1631
1632 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1633                               struct btrfs_free_space *info)
1634 {
1635         struct btrfs_free_space *bitmap_info;
1636         struct btrfs_block_group_cache *block_group = NULL;
1637         int added = 0;
1638         u64 bytes, offset, bytes_added;
1639         int ret;
1640
1641         bytes = info->bytes;
1642         offset = info->offset;
1643
1644         if (!ctl->op->use_bitmap(ctl, info))
1645                 return 0;
1646
1647         if (ctl->op == &free_space_op)
1648                 block_group = ctl->private;
1649 again:
1650         /*
1651          * Since we link bitmaps right into the cluster we need to see if we
1652          * have a cluster here, and if so and it has our bitmap we need to add
1653          * the free space to that bitmap.
1654          */
1655         if (block_group && !list_empty(&block_group->cluster_list)) {
1656                 struct btrfs_free_cluster *cluster;
1657                 struct rb_node *node;
1658                 struct btrfs_free_space *entry;
1659
1660                 cluster = list_entry(block_group->cluster_list.next,
1661                                      struct btrfs_free_cluster,
1662                                      block_group_list);
1663                 spin_lock(&cluster->lock);
1664                 node = rb_first(&cluster->root);
1665                 if (!node) {
1666                         spin_unlock(&cluster->lock);
1667                         goto no_cluster_bitmap;
1668                 }
1669
1670                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1671                 if (!entry->bitmap) {
1672                         spin_unlock(&cluster->lock);
1673                         goto no_cluster_bitmap;
1674                 }
1675
1676                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1677                         bytes_added = add_bytes_to_bitmap(ctl, entry,
1678                                                           offset, bytes);
1679                         bytes -= bytes_added;
1680                         offset += bytes_added;
1681                 }
1682                 spin_unlock(&cluster->lock);
1683                 if (!bytes) {
1684                         ret = 1;
1685                         goto out;
1686                 }
1687         }
1688
1689 no_cluster_bitmap:
1690         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1691                                          1, 0);
1692         if (!bitmap_info) {
1693                 BUG_ON(added);
1694                 goto new_bitmap;
1695         }
1696
1697         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1698         bytes -= bytes_added;
1699         offset += bytes_added;
1700         added = 0;
1701
1702         if (!bytes) {
1703                 ret = 1;
1704                 goto out;
1705         } else
1706                 goto again;
1707
1708 new_bitmap:
1709         if (info && info->bitmap) {
1710                 add_new_bitmap(ctl, info, offset);
1711                 added = 1;
1712                 info = NULL;
1713                 goto again;
1714         } else {
1715                 spin_unlock(&ctl->tree_lock);
1716
1717                 /* no pre-allocated info, allocate a new one */
1718                 if (!info) {
1719                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1720                                                  GFP_NOFS);
1721                         if (!info) {
1722                                 spin_lock(&ctl->tree_lock);
1723                                 ret = -ENOMEM;
1724                                 goto out;
1725                         }
1726                 }
1727
1728                 /* allocate the bitmap */
1729                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1730                 spin_lock(&ctl->tree_lock);
1731                 if (!info->bitmap) {
1732                         ret = -ENOMEM;
1733                         goto out;
1734                 }
1735                 goto again;
1736         }
1737
1738 out:
1739         if (info) {
1740                 if (info->bitmap)
1741                         kfree(info->bitmap);
1742                 kmem_cache_free(btrfs_free_space_cachep, info);
1743         }
1744
1745         return ret;
1746 }
1747
1748 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1749                           struct btrfs_free_space *info, bool update_stat)
1750 {
1751         struct btrfs_free_space *left_info;
1752         struct btrfs_free_space *right_info;
1753         bool merged = false;
1754         u64 offset = info->offset;
1755         u64 bytes = info->bytes;
1756
1757         /*
1758          * first we want to see if there is free space adjacent to the range we
1759          * are adding, if there is remove that struct and add a new one to
1760          * cover the entire range
1761          */
1762         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1763         if (right_info && rb_prev(&right_info->offset_index))
1764                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1765                                      struct btrfs_free_space, offset_index);
1766         else
1767                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1768
1769         if (right_info && !right_info->bitmap) {
1770                 if (update_stat)
1771                         unlink_free_space(ctl, right_info);
1772                 else
1773                         __unlink_free_space(ctl, right_info);
1774                 info->bytes += right_info->bytes;
1775                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1776                 merged = true;
1777         }
1778
1779         if (left_info && !left_info->bitmap &&
1780             left_info->offset + left_info->bytes == offset) {
1781                 if (update_stat)
1782                         unlink_free_space(ctl, left_info);
1783                 else
1784                         __unlink_free_space(ctl, left_info);
1785                 info->offset = left_info->offset;
1786                 info->bytes += left_info->bytes;
1787                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1788                 merged = true;
1789         }
1790
1791         return merged;
1792 }
1793
1794 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1795                            u64 offset, u64 bytes)
1796 {
1797         struct btrfs_free_space *info;
1798         int ret = 0;
1799
1800         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1801         if (!info)
1802                 return -ENOMEM;
1803
1804         info->offset = offset;
1805         info->bytes = bytes;
1806
1807         spin_lock(&ctl->tree_lock);
1808
1809         if (try_merge_free_space(ctl, info, true))
1810                 goto link;
1811
1812         /*
1813          * There was no extent directly to the left or right of this new
1814          * extent then we know we're going to have to allocate a new extent, so
1815          * before we do that see if we need to drop this into a bitmap
1816          */
1817         ret = insert_into_bitmap(ctl, info);
1818         if (ret < 0) {
1819                 goto out;
1820         } else if (ret) {
1821                 ret = 0;
1822                 goto out;
1823         }
1824 link:
1825         ret = link_free_space(ctl, info);
1826         if (ret)
1827                 kmem_cache_free(btrfs_free_space_cachep, info);
1828 out:
1829         spin_unlock(&ctl->tree_lock);
1830
1831         if (ret) {
1832                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1833                 BUG_ON(ret == -EEXIST);
1834         }
1835
1836         return ret;
1837 }
1838
1839 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1840                             u64 offset, u64 bytes)
1841 {
1842         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1843         struct btrfs_free_space *info;
1844         struct btrfs_free_space *next_info = NULL;
1845         int ret = 0;
1846
1847         spin_lock(&ctl->tree_lock);
1848
1849 again:
1850         info = tree_search_offset(ctl, offset, 0, 0);
1851         if (!info) {
1852                 /*
1853                  * oops didn't find an extent that matched the space we wanted
1854                  * to remove, look for a bitmap instead
1855                  */
1856                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1857                                           1, 0);
1858                 if (!info) {
1859                         /* the tree logging code might be calling us before we
1860                          * have fully loaded the free space rbtree for this
1861                          * block group.  So it is possible the entry won't
1862                          * be in the rbtree yet at all.  The caching code
1863                          * will make sure not to put it in the rbtree if
1864                          * the logging code has pinned it.
1865                          */
1866                         goto out_lock;
1867                 }
1868         }
1869
1870         if (info->bytes < bytes && rb_next(&info->offset_index)) {
1871                 u64 end;
1872                 next_info = rb_entry(rb_next(&info->offset_index),
1873                                              struct btrfs_free_space,
1874                                              offset_index);
1875
1876                 if (next_info->bitmap)
1877                         end = next_info->offset +
1878                               BITS_PER_BITMAP * ctl->unit - 1;
1879                 else
1880                         end = next_info->offset + next_info->bytes;
1881
1882                 if (next_info->bytes < bytes ||
1883                     next_info->offset > offset || offset > end) {
1884                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
1885                               " trying to use %llu\n",
1886                               (unsigned long long)info->offset,
1887                               (unsigned long long)info->bytes,
1888                               (unsigned long long)bytes);
1889                         WARN_ON(1);
1890                         ret = -EINVAL;
1891                         goto out_lock;
1892                 }
1893
1894                 info = next_info;
1895         }
1896
1897         if (info->bytes == bytes) {
1898                 unlink_free_space(ctl, info);
1899                 if (info->bitmap) {
1900                         kfree(info->bitmap);
1901                         ctl->total_bitmaps--;
1902                 }
1903                 kmem_cache_free(btrfs_free_space_cachep, info);
1904                 ret = 0;
1905                 goto out_lock;
1906         }
1907
1908         if (!info->bitmap && info->offset == offset) {
1909                 unlink_free_space(ctl, info);
1910                 info->offset += bytes;
1911                 info->bytes -= bytes;
1912                 ret = link_free_space(ctl, info);
1913                 WARN_ON(ret);
1914                 goto out_lock;
1915         }
1916
1917         if (!info->bitmap && info->offset <= offset &&
1918             info->offset + info->bytes >= offset + bytes) {
1919                 u64 old_start = info->offset;
1920                 /*
1921                  * we're freeing space in the middle of the info,
1922                  * this can happen during tree log replay
1923                  *
1924                  * first unlink the old info and then
1925                  * insert it again after the hole we're creating
1926                  */
1927                 unlink_free_space(ctl, info);
1928                 if (offset + bytes < info->offset + info->bytes) {
1929                         u64 old_end = info->offset + info->bytes;
1930
1931                         info->offset = offset + bytes;
1932                         info->bytes = old_end - info->offset;
1933                         ret = link_free_space(ctl, info);
1934                         WARN_ON(ret);
1935                         if (ret)
1936                                 goto out_lock;
1937                 } else {
1938                         /* the hole we're creating ends at the end
1939                          * of the info struct, just free the info
1940                          */
1941                         kmem_cache_free(btrfs_free_space_cachep, info);
1942                 }
1943                 spin_unlock(&ctl->tree_lock);
1944
1945                 /* step two, insert a new info struct to cover
1946                  * anything before the hole
1947                  */
1948                 ret = btrfs_add_free_space(block_group, old_start,
1949                                            offset - old_start);
1950                 WARN_ON(ret);
1951                 goto out;
1952         }
1953
1954         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1955         if (ret == -EAGAIN)
1956                 goto again;
1957         BUG_ON(ret);
1958 out_lock:
1959         spin_unlock(&ctl->tree_lock);
1960 out:
1961         return ret;
1962 }
1963
1964 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1965                            u64 bytes)
1966 {
1967         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1968         struct btrfs_free_space *info;
1969         struct rb_node *n;
1970         int count = 0;
1971
1972         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1973                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1974                 if (info->bytes >= bytes)
1975                         count++;
1976                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1977                        (unsigned long long)info->offset,
1978                        (unsigned long long)info->bytes,
1979                        (info->bitmap) ? "yes" : "no");
1980         }
1981         printk(KERN_INFO "block group has cluster?: %s\n",
1982                list_empty(&block_group->cluster_list) ? "no" : "yes");
1983         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1984                "\n", count);
1985 }
1986
1987 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1988 {
1989         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1990
1991         spin_lock_init(&ctl->tree_lock);
1992         ctl->unit = block_group->sectorsize;
1993         ctl->start = block_group->key.objectid;
1994         ctl->private = block_group;
1995         ctl->op = &free_space_op;
1996
1997         /*
1998          * we only want to have 32k of ram per block group for keeping
1999          * track of free space, and if we pass 1/2 of that we want to
2000          * start converting things over to using bitmaps
2001          */
2002         ctl->extents_thresh = ((1024 * 32) / 2) /
2003                                 sizeof(struct btrfs_free_space);
2004 }
2005
2006 /*
2007  * for a given cluster, put all of its extents back into the free
2008  * space cache.  If the block group passed doesn't match the block group
2009  * pointed to by the cluster, someone else raced in and freed the
2010  * cluster already.  In that case, we just return without changing anything
2011  */
2012 static int
2013 __btrfs_return_cluster_to_free_space(
2014                              struct btrfs_block_group_cache *block_group,
2015                              struct btrfs_free_cluster *cluster)
2016 {
2017         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2018         struct btrfs_free_space *entry;
2019         struct rb_node *node;
2020
2021         spin_lock(&cluster->lock);
2022         if (cluster->block_group != block_group)
2023                 goto out;
2024
2025         cluster->block_group = NULL;
2026         cluster->window_start = 0;
2027         list_del_init(&cluster->block_group_list);
2028
2029         node = rb_first(&cluster->root);
2030         while (node) {
2031                 bool bitmap;
2032
2033                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2034                 node = rb_next(&entry->offset_index);
2035                 rb_erase(&entry->offset_index, &cluster->root);
2036
2037                 bitmap = (entry->bitmap != NULL);
2038                 if (!bitmap)
2039                         try_merge_free_space(ctl, entry, false);
2040                 tree_insert_offset(&ctl->free_space_offset,
2041                                    entry->offset, &entry->offset_index, bitmap);
2042         }
2043         cluster->root = RB_ROOT;
2044
2045 out:
2046         spin_unlock(&cluster->lock);
2047         btrfs_put_block_group(block_group);
2048         return 0;
2049 }
2050
2051 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
2052 {
2053         struct btrfs_free_space *info;
2054         struct rb_node *node;
2055
2056         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2057                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2058                 if (!info->bitmap) {
2059                         unlink_free_space(ctl, info);
2060                         kmem_cache_free(btrfs_free_space_cachep, info);
2061                 } else {
2062                         free_bitmap(ctl, info);
2063                 }
2064                 if (need_resched()) {
2065                         spin_unlock(&ctl->tree_lock);
2066                         cond_resched();
2067                         spin_lock(&ctl->tree_lock);
2068                 }
2069         }
2070 }
2071
2072 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2073 {
2074         spin_lock(&ctl->tree_lock);
2075         __btrfs_remove_free_space_cache_locked(ctl);
2076         spin_unlock(&ctl->tree_lock);
2077 }
2078
2079 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2080 {
2081         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2082         struct btrfs_free_cluster *cluster;
2083         struct list_head *head;
2084
2085         spin_lock(&ctl->tree_lock);
2086         while ((head = block_group->cluster_list.next) !=
2087                &block_group->cluster_list) {
2088                 cluster = list_entry(head, struct btrfs_free_cluster,
2089                                      block_group_list);
2090
2091                 WARN_ON(cluster->block_group != block_group);
2092                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2093                 if (need_resched()) {
2094                         spin_unlock(&ctl->tree_lock);
2095                         cond_resched();
2096                         spin_lock(&ctl->tree_lock);
2097                 }
2098         }
2099         __btrfs_remove_free_space_cache_locked(ctl);
2100         spin_unlock(&ctl->tree_lock);
2101
2102 }
2103
2104 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2105                                u64 offset, u64 bytes, u64 empty_size)
2106 {
2107         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2108         struct btrfs_free_space *entry = NULL;
2109         u64 bytes_search = bytes + empty_size;
2110         u64 ret = 0;
2111
2112         spin_lock(&ctl->tree_lock);
2113         entry = find_free_space(ctl, &offset, &bytes_search);
2114         if (!entry)
2115                 goto out;
2116
2117         ret = offset;
2118         if (entry->bitmap) {
2119                 bitmap_clear_bits(ctl, entry, offset, bytes);
2120                 if (!entry->bytes)
2121                         free_bitmap(ctl, entry);
2122         } else {
2123                 unlink_free_space(ctl, entry);
2124                 entry->offset += bytes;
2125                 entry->bytes -= bytes;
2126                 if (!entry->bytes)
2127                         kmem_cache_free(btrfs_free_space_cachep, entry);
2128                 else
2129                         link_free_space(ctl, entry);
2130         }
2131
2132 out:
2133         spin_unlock(&ctl->tree_lock);
2134
2135         return ret;
2136 }
2137
2138 /*
2139  * given a cluster, put all of its extents back into the free space
2140  * cache.  If a block group is passed, this function will only free
2141  * a cluster that belongs to the passed block group.
2142  *
2143  * Otherwise, it'll get a reference on the block group pointed to by the
2144  * cluster and remove the cluster from it.
2145  */
2146 int btrfs_return_cluster_to_free_space(
2147                                struct btrfs_block_group_cache *block_group,
2148                                struct btrfs_free_cluster *cluster)
2149 {
2150         struct btrfs_free_space_ctl *ctl;
2151         int ret;
2152
2153         /* first, get a safe pointer to the block group */
2154         spin_lock(&cluster->lock);
2155         if (!block_group) {
2156                 block_group = cluster->block_group;
2157                 if (!block_group) {
2158                         spin_unlock(&cluster->lock);
2159                         return 0;
2160                 }
2161         } else if (cluster->block_group != block_group) {
2162                 /* someone else has already freed it don't redo their work */
2163                 spin_unlock(&cluster->lock);
2164                 return 0;
2165         }
2166         atomic_inc(&block_group->count);
2167         spin_unlock(&cluster->lock);
2168
2169         ctl = block_group->free_space_ctl;
2170
2171         /* now return any extents the cluster had on it */
2172         spin_lock(&ctl->tree_lock);
2173         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2174         spin_unlock(&ctl->tree_lock);
2175
2176         /* finally drop our ref */
2177         btrfs_put_block_group(block_group);
2178         return ret;
2179 }
2180
2181 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2182                                    struct btrfs_free_cluster *cluster,
2183                                    struct btrfs_free_space *entry,
2184                                    u64 bytes, u64 min_start)
2185 {
2186         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2187         int err;
2188         u64 search_start = cluster->window_start;
2189         u64 search_bytes = bytes;
2190         u64 ret = 0;
2191
2192         search_start = min_start;
2193         search_bytes = bytes;
2194
2195         err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2196         if (err)
2197                 return 0;
2198
2199         ret = search_start;
2200         __bitmap_clear_bits(ctl, entry, ret, bytes);
2201
2202         return ret;
2203 }
2204
2205 /*
2206  * given a cluster, try to allocate 'bytes' from it, returns 0
2207  * if it couldn't find anything suitably large, or a logical disk offset
2208  * if things worked out
2209  */
2210 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2211                              struct btrfs_free_cluster *cluster, u64 bytes,
2212                              u64 min_start)
2213 {
2214         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2215         struct btrfs_free_space *entry = NULL;
2216         struct rb_node *node;
2217         u64 ret = 0;
2218
2219         spin_lock(&cluster->lock);
2220         if (bytes > cluster->max_size)
2221                 goto out;
2222
2223         if (cluster->block_group != block_group)
2224                 goto out;
2225
2226         node = rb_first(&cluster->root);
2227         if (!node)
2228                 goto out;
2229
2230         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2231         while(1) {
2232                 if (entry->bytes < bytes ||
2233                     (!entry->bitmap && entry->offset < min_start)) {
2234                         node = rb_next(&entry->offset_index);
2235                         if (!node)
2236                                 break;
2237                         entry = rb_entry(node, struct btrfs_free_space,
2238                                          offset_index);
2239                         continue;
2240                 }
2241
2242                 if (entry->bitmap) {
2243                         ret = btrfs_alloc_from_bitmap(block_group,
2244                                                       cluster, entry, bytes,
2245                                                       min_start);
2246                         if (ret == 0) {
2247                                 node = rb_next(&entry->offset_index);
2248                                 if (!node)
2249                                         break;
2250                                 entry = rb_entry(node, struct btrfs_free_space,
2251                                                  offset_index);
2252                                 continue;
2253                         }
2254                 } else {
2255                         ret = entry->offset;
2256
2257                         entry->offset += bytes;
2258                         entry->bytes -= bytes;
2259                 }
2260
2261                 if (entry->bytes == 0)
2262                         rb_erase(&entry->offset_index, &cluster->root);
2263                 break;
2264         }
2265 out:
2266         spin_unlock(&cluster->lock);
2267
2268         if (!ret)
2269                 return 0;
2270
2271         spin_lock(&ctl->tree_lock);
2272
2273         ctl->free_space -= bytes;
2274         if (entry->bytes == 0) {
2275                 ctl->free_extents--;
2276                 if (entry->bitmap) {
2277                         kfree(entry->bitmap);
2278                         ctl->total_bitmaps--;
2279                         ctl->op->recalc_thresholds(ctl);
2280                 }
2281                 kmem_cache_free(btrfs_free_space_cachep, entry);
2282         }
2283
2284         spin_unlock(&ctl->tree_lock);
2285
2286         return ret;
2287 }
2288
2289 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2290                                 struct btrfs_free_space *entry,
2291                                 struct btrfs_free_cluster *cluster,
2292                                 u64 offset, u64 bytes,
2293                                 u64 cont1_bytes, u64 min_bytes)
2294 {
2295         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2296         unsigned long next_zero;
2297         unsigned long i;
2298         unsigned long want_bits;
2299         unsigned long min_bits;
2300         unsigned long found_bits;
2301         unsigned long start = 0;
2302         unsigned long total_found = 0;
2303         int ret;
2304
2305         i = offset_to_bit(entry->offset, block_group->sectorsize,
2306                           max_t(u64, offset, entry->offset));
2307         want_bits = bytes_to_bits(bytes, block_group->sectorsize);
2308         min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2309
2310 again:
2311         found_bits = 0;
2312         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2313              i < BITS_PER_BITMAP;
2314              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2315                 next_zero = find_next_zero_bit(entry->bitmap,
2316                                                BITS_PER_BITMAP, i);
2317                 if (next_zero - i >= min_bits) {
2318                         found_bits = next_zero - i;
2319                         break;
2320                 }
2321                 i = next_zero;
2322         }
2323
2324         if (!found_bits)
2325                 return -ENOSPC;
2326
2327         if (!total_found) {
2328                 start = i;
2329                 cluster->max_size = 0;
2330         }
2331
2332         total_found += found_bits;
2333
2334         if (cluster->max_size < found_bits * block_group->sectorsize)
2335                 cluster->max_size = found_bits * block_group->sectorsize;
2336
2337         if (total_found < want_bits || cluster->max_size < cont1_bytes) {
2338                 i = next_zero + 1;
2339                 goto again;
2340         }
2341
2342         cluster->window_start = start * block_group->sectorsize +
2343                 entry->offset;
2344         rb_erase(&entry->offset_index, &ctl->free_space_offset);
2345         ret = tree_insert_offset(&cluster->root, entry->offset,
2346                                  &entry->offset_index, 1);
2347         BUG_ON(ret);
2348
2349         return 0;
2350 }
2351
2352 /*
2353  * This searches the block group for just extents to fill the cluster with.
2354  * Try to find a cluster with at least bytes total bytes, at least one
2355  * extent of cont1_bytes, and other clusters of at least min_bytes.
2356  */
2357 static noinline int
2358 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2359                         struct btrfs_free_cluster *cluster,
2360                         struct list_head *bitmaps, u64 offset, u64 bytes,
2361                         u64 cont1_bytes, u64 min_bytes)
2362 {
2363         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2364         struct btrfs_free_space *first = NULL;
2365         struct btrfs_free_space *entry = NULL;
2366         struct btrfs_free_space *last;
2367         struct rb_node *node;
2368         u64 window_start;
2369         u64 window_free;
2370         u64 max_extent;
2371
2372         entry = tree_search_offset(ctl, offset, 0, 1);
2373         if (!entry)
2374                 return -ENOSPC;
2375
2376         /*
2377          * We don't want bitmaps, so just move along until we find a normal
2378          * extent entry.
2379          */
2380         while (entry->bitmap || entry->bytes < min_bytes) {
2381                 if (entry->bitmap && list_empty(&entry->list))
2382                         list_add_tail(&entry->list, bitmaps);
2383                 node = rb_next(&entry->offset_index);
2384                 if (!node)
2385                         return -ENOSPC;
2386                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2387         }
2388
2389         window_start = entry->offset;
2390         window_free = entry->bytes;
2391         max_extent = entry->bytes;
2392         first = entry;
2393         last = entry;
2394
2395         for (node = rb_next(&entry->offset_index); node;
2396              node = rb_next(&entry->offset_index)) {
2397                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2398
2399                 if (entry->bitmap) {
2400                         if (list_empty(&entry->list))
2401                                 list_add_tail(&entry->list, bitmaps);
2402                         continue;
2403                 }
2404
2405                 if (entry->bytes < min_bytes)
2406                         continue;
2407
2408                 last = entry;
2409                 window_free += entry->bytes;
2410                 if (entry->bytes > max_extent)
2411                         max_extent = entry->bytes;
2412         }
2413
2414         if (window_free < bytes || max_extent < cont1_bytes)
2415                 return -ENOSPC;
2416
2417         cluster->window_start = first->offset;
2418
2419         node = &first->offset_index;
2420
2421         /*
2422          * now we've found our entries, pull them out of the free space
2423          * cache and put them into the cluster rbtree
2424          */
2425         do {
2426                 int ret;
2427
2428                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2429                 node = rb_next(&entry->offset_index);
2430                 if (entry->bitmap || entry->bytes < min_bytes)
2431                         continue;
2432
2433                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2434                 ret = tree_insert_offset(&cluster->root, entry->offset,
2435                                          &entry->offset_index, 0);
2436                 BUG_ON(ret);
2437         } while (node && entry != last);
2438
2439         cluster->max_size = max_extent;
2440
2441         return 0;
2442 }
2443
2444 /*
2445  * This specifically looks for bitmaps that may work in the cluster, we assume
2446  * that we have already failed to find extents that will work.
2447  */
2448 static noinline int
2449 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2450                      struct btrfs_free_cluster *cluster,
2451                      struct list_head *bitmaps, u64 offset, u64 bytes,
2452                      u64 cont1_bytes, u64 min_bytes)
2453 {
2454         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2455         struct btrfs_free_space *entry;
2456         int ret = -ENOSPC;
2457         u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2458
2459         if (ctl->total_bitmaps == 0)
2460                 return -ENOSPC;
2461
2462         /*
2463          * The bitmap that covers offset won't be in the list unless offset
2464          * is just its start offset.
2465          */
2466         entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2467         if (entry->offset != bitmap_offset) {
2468                 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2469                 if (entry && list_empty(&entry->list))
2470                         list_add(&entry->list, bitmaps);
2471         }
2472
2473         list_for_each_entry(entry, bitmaps, list) {
2474                 if (entry->bytes < min_bytes)
2475                         continue;
2476                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2477                                            bytes, cont1_bytes, min_bytes);
2478                 if (!ret)
2479                         return 0;
2480         }
2481
2482         /*
2483          * The bitmaps list has all the bitmaps that record free space
2484          * starting after offset, so no more search is required.
2485          */
2486         return -ENOSPC;
2487 }
2488
2489 /*
2490  * here we try to find a cluster of blocks in a block group.  The goal
2491  * is to find at least bytes+empty_size.
2492  * We might not find them all in one contiguous area.
2493  *
2494  * returns zero and sets up cluster if things worked out, otherwise
2495  * it returns -enospc
2496  */
2497 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2498                              struct btrfs_root *root,
2499                              struct btrfs_block_group_cache *block_group,
2500                              struct btrfs_free_cluster *cluster,
2501                              u64 offset, u64 bytes, u64 empty_size)
2502 {
2503         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2504         struct btrfs_free_space *entry, *tmp;
2505         LIST_HEAD(bitmaps);
2506         u64 min_bytes;
2507         u64 cont1_bytes;
2508         int ret;
2509
2510         /*
2511          * Choose the minimum extent size we'll require for this
2512          * cluster.  For SSD_SPREAD, don't allow any fragmentation.
2513          * For metadata, allow allocates with smaller extents.  For
2514          * data, keep it dense.
2515          */
2516         if (btrfs_test_opt(root, SSD_SPREAD)) {
2517                 cont1_bytes = min_bytes = bytes + empty_size;
2518         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2519                 cont1_bytes = bytes;
2520                 min_bytes = block_group->sectorsize;
2521         } else {
2522                 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
2523                 min_bytes = block_group->sectorsize;
2524         }
2525
2526         spin_lock(&ctl->tree_lock);
2527
2528         /*
2529          * If we know we don't have enough space to make a cluster don't even
2530          * bother doing all the work to try and find one.
2531          */
2532         if (ctl->free_space < bytes) {
2533                 spin_unlock(&ctl->tree_lock);
2534                 return -ENOSPC;
2535         }
2536
2537         spin_lock(&cluster->lock);
2538
2539         /* someone already found a cluster, hooray */
2540         if (cluster->block_group) {
2541                 ret = 0;
2542                 goto out;
2543         }
2544
2545         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2546                                       bytes + empty_size,
2547                                       cont1_bytes, min_bytes);
2548         if (ret)
2549                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2550                                            offset, bytes + empty_size,
2551                                            cont1_bytes, min_bytes);
2552
2553         /* Clear our temporary list */
2554         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2555                 list_del_init(&entry->list);
2556
2557         if (!ret) {
2558                 atomic_inc(&block_group->count);
2559                 list_add_tail(&cluster->block_group_list,
2560                               &block_group->cluster_list);
2561                 cluster->block_group = block_group;
2562         }
2563 out:
2564         spin_unlock(&cluster->lock);
2565         spin_unlock(&ctl->tree_lock);
2566
2567         return ret;
2568 }
2569
2570 /*
2571  * simple code to zero out a cluster
2572  */
2573 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2574 {
2575         spin_lock_init(&cluster->lock);
2576         spin_lock_init(&cluster->refill_lock);
2577         cluster->root = RB_ROOT;
2578         cluster->max_size = 0;
2579         INIT_LIST_HEAD(&cluster->block_group_list);
2580         cluster->block_group = NULL;
2581 }
2582
2583 static int do_trimming(struct btrfs_block_group_cache *block_group,
2584                        u64 *total_trimmed, u64 start, u64 bytes,
2585                        u64 reserved_start, u64 reserved_bytes)
2586 {
2587         struct btrfs_space_info *space_info = block_group->space_info;
2588         struct btrfs_fs_info *fs_info = block_group->fs_info;
2589         int ret;
2590         int update = 0;
2591         u64 trimmed = 0;
2592
2593         spin_lock(&space_info->lock);
2594         spin_lock(&block_group->lock);
2595         if (!block_group->ro) {
2596                 block_group->reserved += reserved_bytes;
2597                 space_info->bytes_reserved += reserved_bytes;
2598                 update = 1;
2599         }
2600         spin_unlock(&block_group->lock);
2601         spin_unlock(&space_info->lock);
2602
2603         ret = btrfs_error_discard_extent(fs_info->extent_root,
2604                                          start, bytes, &trimmed);
2605         if (!ret)
2606                 *total_trimmed += trimmed;
2607
2608         btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2609
2610         if (update) {
2611                 spin_lock(&space_info->lock);
2612                 spin_lock(&block_group->lock);
2613                 if (block_group->ro)
2614                         space_info->bytes_readonly += reserved_bytes;
2615                 block_group->reserved -= reserved_bytes;
2616                 space_info->bytes_reserved -= reserved_bytes;
2617                 spin_unlock(&space_info->lock);
2618                 spin_unlock(&block_group->lock);
2619         }
2620
2621         return ret;
2622 }
2623
2624 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2625                           u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2626 {
2627         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2628         struct btrfs_free_space *entry;
2629         struct rb_node *node;
2630         int ret = 0;
2631         u64 extent_start;
2632         u64 extent_bytes;
2633         u64 bytes;
2634
2635         while (start < end) {
2636                 spin_lock(&ctl->tree_lock);
2637
2638                 if (ctl->free_space < minlen) {
2639                         spin_unlock(&ctl->tree_lock);
2640                         break;
2641                 }
2642
2643                 entry = tree_search_offset(ctl, start, 0, 1);
2644                 if (!entry) {
2645                         spin_unlock(&ctl->tree_lock);
2646                         break;
2647                 }
2648
2649                 /* skip bitmaps */
2650                 while (entry->bitmap) {
2651                         node = rb_next(&entry->offset_index);
2652                         if (!node) {
2653                                 spin_unlock(&ctl->tree_lock);
2654                                 goto out;
2655                         }
2656                         entry = rb_entry(node, struct btrfs_free_space,
2657                                          offset_index);
2658                 }
2659
2660                 if (entry->offset >= end) {
2661                         spin_unlock(&ctl->tree_lock);
2662                         break;
2663                 }
2664
2665                 extent_start = entry->offset;
2666                 extent_bytes = entry->bytes;
2667                 start = max(start, extent_start);
2668                 bytes = min(extent_start + extent_bytes, end) - start;
2669                 if (bytes < minlen) {
2670                         spin_unlock(&ctl->tree_lock);
2671                         goto next;
2672                 }
2673
2674                 unlink_free_space(ctl, entry);
2675                 kmem_cache_free(btrfs_free_space_cachep, entry);
2676
2677                 spin_unlock(&ctl->tree_lock);
2678
2679                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2680                                   extent_start, extent_bytes);
2681                 if (ret)
2682                         break;
2683 next:
2684                 start += bytes;
2685
2686                 if (fatal_signal_pending(current)) {
2687                         ret = -ERESTARTSYS;
2688                         break;
2689                 }
2690
2691                 cond_resched();
2692         }
2693 out:
2694         return ret;
2695 }
2696
2697 static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2698                         u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2699 {
2700         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2701         struct btrfs_free_space *entry;
2702         int ret = 0;
2703         int ret2;
2704         u64 bytes;
2705         u64 offset = offset_to_bitmap(ctl, start);
2706
2707         while (offset < end) {
2708                 bool next_bitmap = false;
2709
2710                 spin_lock(&ctl->tree_lock);
2711
2712                 if (ctl->free_space < minlen) {
2713                         spin_unlock(&ctl->tree_lock);
2714                         break;
2715                 }
2716
2717                 entry = tree_search_offset(ctl, offset, 1, 0);
2718                 if (!entry) {
2719                         spin_unlock(&ctl->tree_lock);
2720                         next_bitmap = true;
2721                         goto next;
2722                 }
2723
2724                 bytes = minlen;
2725                 ret2 = search_bitmap(ctl, entry, &start, &bytes);
2726                 if (ret2 || start >= end) {
2727                         spin_unlock(&ctl->tree_lock);
2728                         next_bitmap = true;
2729                         goto next;
2730                 }
2731
2732                 bytes = min(bytes, end - start);
2733                 if (bytes < minlen) {
2734                         spin_unlock(&ctl->tree_lock);
2735                         goto next;
2736                 }
2737
2738                 bitmap_clear_bits(ctl, entry, start, bytes);
2739                 if (entry->bytes == 0)
2740                         free_bitmap(ctl, entry);
2741
2742                 spin_unlock(&ctl->tree_lock);
2743
2744                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2745                                   start, bytes);
2746                 if (ret)
2747                         break;
2748 next:
2749                 if (next_bitmap) {
2750                         offset += BITS_PER_BITMAP * ctl->unit;
2751                 } else {
2752                         start += bytes;
2753                         if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2754                                 offset += BITS_PER_BITMAP * ctl->unit;
2755                 }
2756
2757                 if (fatal_signal_pending(current)) {
2758                         ret = -ERESTARTSYS;
2759                         break;
2760                 }
2761
2762                 cond_resched();
2763         }
2764
2765         return ret;
2766 }
2767
2768 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2769                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2770 {
2771         int ret;
2772
2773         *trimmed = 0;
2774
2775         ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2776         if (ret)
2777                 return ret;
2778
2779         ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
2780
2781         return ret;
2782 }
2783
2784 /*
2785  * Find the left-most item in the cache tree, and then return the
2786  * smallest inode number in the item.
2787  *
2788  * Note: the returned inode number may not be the smallest one in
2789  * the tree, if the left-most item is a bitmap.
2790  */
2791 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2792 {
2793         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2794         struct btrfs_free_space *entry = NULL;
2795         u64 ino = 0;
2796
2797         spin_lock(&ctl->tree_lock);
2798
2799         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2800                 goto out;
2801
2802         entry = rb_entry(rb_first(&ctl->free_space_offset),
2803                          struct btrfs_free_space, offset_index);
2804
2805         if (!entry->bitmap) {
2806                 ino = entry->offset;
2807
2808                 unlink_free_space(ctl, entry);
2809                 entry->offset++;
2810                 entry->bytes--;
2811                 if (!entry->bytes)
2812                         kmem_cache_free(btrfs_free_space_cachep, entry);
2813                 else
2814                         link_free_space(ctl, entry);
2815         } else {
2816                 u64 offset = 0;
2817                 u64 count = 1;
2818                 int ret;
2819
2820                 ret = search_bitmap(ctl, entry, &offset, &count);
2821                 BUG_ON(ret);
2822
2823                 ino = offset;
2824                 bitmap_clear_bits(ctl, entry, offset, 1);
2825                 if (entry->bytes == 0)
2826                         free_bitmap(ctl, entry);
2827         }
2828 out:
2829         spin_unlock(&ctl->tree_lock);
2830
2831         return ino;
2832 }
2833
2834 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2835                                     struct btrfs_path *path)
2836 {
2837         struct inode *inode = NULL;
2838
2839         spin_lock(&root->cache_lock);
2840         if (root->cache_inode)
2841                 inode = igrab(root->cache_inode);
2842         spin_unlock(&root->cache_lock);
2843         if (inode)
2844                 return inode;
2845
2846         inode = __lookup_free_space_inode(root, path, 0);
2847         if (IS_ERR(inode))
2848                 return inode;
2849
2850         spin_lock(&root->cache_lock);
2851         if (!btrfs_fs_closing(root->fs_info))
2852                 root->cache_inode = igrab(inode);
2853         spin_unlock(&root->cache_lock);
2854
2855         return inode;
2856 }
2857
2858 int create_free_ino_inode(struct btrfs_root *root,
2859                           struct btrfs_trans_handle *trans,
2860                           struct btrfs_path *path)
2861 {
2862         return __create_free_space_inode(root, trans, path,
2863                                          BTRFS_FREE_INO_OBJECTID, 0);
2864 }
2865
2866 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2867 {
2868         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2869         struct btrfs_path *path;
2870         struct inode *inode;
2871         int ret = 0;
2872         u64 root_gen = btrfs_root_generation(&root->root_item);
2873
2874         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2875                 return 0;
2876
2877         /*
2878          * If we're unmounting then just return, since this does a search on the
2879          * normal root and not the commit root and we could deadlock.
2880          */
2881         if (btrfs_fs_closing(fs_info))
2882                 return 0;
2883
2884         path = btrfs_alloc_path();
2885         if (!path)
2886                 return 0;
2887
2888         inode = lookup_free_ino_inode(root, path);
2889         if (IS_ERR(inode))
2890                 goto out;
2891
2892         if (root_gen != BTRFS_I(inode)->generation)
2893                 goto out_put;
2894
2895         ret = __load_free_space_cache(root, inode, ctl, path, 0);
2896
2897         if (ret < 0)
2898                 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2899                        "root %llu\n", root->root_key.objectid);
2900 out_put:
2901         iput(inode);
2902 out:
2903         btrfs_free_path(path);
2904         return ret;
2905 }
2906
2907 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2908                               struct btrfs_trans_handle *trans,
2909                               struct btrfs_path *path)
2910 {
2911         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2912         struct inode *inode;
2913         int ret;
2914
2915         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2916                 return 0;
2917
2918         inode = lookup_free_ino_inode(root, path);
2919         if (IS_ERR(inode))
2920                 return 0;
2921
2922         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2923         if (ret) {
2924                 btrfs_delalloc_release_metadata(inode, inode->i_size);
2925 #ifdef DEBUG
2926                 printk(KERN_ERR "btrfs: failed to write free ino cache "
2927                        "for root %llu\n", root->root_key.objectid);
2928 #endif
2929         }
2930
2931         iput(inode);
2932         return ret;
2933 }