[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] Re: [dm-devel] Re: [PATCH 0/8] I/O bandwidth controller and BIO tracking



Hi, Kamezawa-san,

> > Hi everyone,
> > 
> > This is a new release of dm-ioband and bio-cgroup. With this release,
> > the overhead of bio-cgroup is significantly reduced and the accuracy
> > of block I/O tracking is much improved. These patches are for
> > 2.6.28-rc2-mm1.
> > 
> 
> >From my point of view, a way to record bio_cgroup_id to page_cgroup is quite 
> >neat
> and nice.
> 
> My concern is "bio_cgroup_id". It's provided only for bio_cgroup.
> In this summer, I tried to add swap_cgroup_id only for mem+swap controller but
> commenters said "please provide "id and lookup" in cgroup layer, it should be 
> useful."
> And I agree them. (and postponed it ;)

Yup, that sounds really good.

> Could you try "id" in cgroup layer ? How do you think, Paul and others ?

It seems to easy to implement this feature since what I need to do is
move the code from bio-group to there.
Okay, I'll start it if Paul agrees with this approach.

> That's my only concern and if I/O controller people decides to live with
> this bio tracking infrastracture,
> ==
>    page -> page_cgroup -> bio_cgroup_id
> ==
> I have no objections. And enqueue necessary changes to my queue.

It would be nice if you can include the following patch, which just makes
the page_cgroup infrastructure can be compiled in even when the cgroup
memory controller is compiled out.

> Thanks,
> -Kame

Thank you,
Hirokazu Takahashi.

 ---------------------------

This patch makes the page_cgroup framework be able to be used even if
the compile option of the cgroup memory controller is off.
So bio-cgroup can use this framework without the memory controller.

Signed-off-by: Hirokazu Takahashi <taka@xxxxxxxxxxxxx>

diff -dupr linux-2.6.28-rc2.bc0/include/linux/memcontrol.h 
linux-2.6.28-rc2/include/linux/memcontrol.h
--- linux-2.6.28-rc2.bc0/include/linux/memcontrol.h     2008-11-10 
18:31:34.000000000 +0900
+++ linux-2.6.28-rc2/include/linux/memcontrol.h 2008-11-11 13:51:42.000000000 
+0900
@@ -27,6 +27,9 @@ struct mm_struct;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
+extern void __init_mem_page_cgroup(struct page_cgroup *pc);
+#define  mem_cgroup_disabled() mem_cgroup_subsys.disabled
+
 extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask);
 /* for swap handling */
@@ -81,6 +84,15 @@ extern long mem_cgroup_calc_reclaim(stru
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
+static inline void __init_mem_page_cgroup(struct page_cgroup *pc)
+{
+}
+
+static inline int mem_cgroup_disabled(void)
+{
+       return 1;
+}
+
 static inline int mem_cgroup_newpage_charge(struct page *page,
                                        struct mm_struct *mm, gfp_t gfp_mask)
 {
diff -dupr linux-2.6.28-rc2.bc0/include/linux/mmzone.h 
linux-2.6.28-rc2/include/linux/mmzone.h
--- linux-2.6.28-rc2.bc0/include/linux/mmzone.h 2008-11-10 18:50:50.000000000 
+0900
+++ linux-2.6.28-rc2/include/linux/mmzone.h     2008-11-11 13:51:42.000000000 
+0900
@@ -603,7 +603,7 @@ typedef struct pglist_data {
        int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP        /* means !SPARSEMEM */
        struct page *node_mem_map;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_CGROUP_PAGE
        struct page_cgroup *node_page_cgroup;
 #endif
 #endif
@@ -952,7 +952,7 @@ struct mem_section {
 
        /* See declaration of similar field in struct zone */
        unsigned long *pageblock_flags;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_CGROUP_PAGE
        /*
         * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
         * section. (see memcontrol.h/page_cgroup.h about this.)
diff -dupr linux-2.6.28-rc2.bc0/include/linux/page_cgroup.h 
linux-2.6.28-rc2/include/linux/page_cgroup.h
--- linux-2.6.28-rc2.bc0/include/linux/page_cgroup.h    2008-11-10 
19:29:00.000000000 +0900
+++ linux-2.6.28-rc2/include/linux/page_cgroup.h        2008-11-11 
14:46:47.000000000 +0900
@@ -1,7 +1,7 @@
 #ifndef __LINUX_PAGE_CGROUP_H
 #define __LINUX_PAGE_CGROUP_H
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_CGROUP_PAGE
 #include <linux/bit_spinlock.h>
 /*
  * Page Cgroup can be considered as an extended mem_map.
@@ -12,9 +12,11 @@
  */
 struct page_cgroup {
        unsigned long flags;
-       struct mem_cgroup *mem_cgroup;
        struct page *page;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+       struct mem_cgroup *mem_cgroup;
        struct list_head lru;           /* per cgroup LRU list */
+#endif
 };
 
 void __init pgdat_page_cgroup_init(struct pglist_data *pgdat);
@@ -88,7 +90,7 @@ static inline void unlock_page_cgroup(st
        bit_spin_unlock(PCG_LOCK, &pc->flags);
 }
 
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+#else /* CONFIG_CGROUP_PAGE */
 struct page_cgroup;
 
 static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
diff -dupr linux-2.6.28-rc2.bc0/init/Kconfig linux-2.6.28-rc2/init/Kconfig
--- linux-2.6.28-rc2.bc0/init/Kconfig   2008-11-10 18:31:34.000000000 +0900
+++ linux-2.6.28-rc2/init/Kconfig       2008-11-11 14:46:47.000000000 +0900
@@ -425,6 +425,10 @@ config CGROUP_MEM_RES_CTLR
          This config option also selects MM_OWNER config option, which
          could in turn add some fork/exit overhead.
 
+config CGROUP_PAGE
+       def_bool y
+       depends on CGROUP_MEM_RES_CTLR
+
 config MM_OWNER
        bool
 
diff -dupr linux-2.6.28-rc2.bc0/mm/Makefile linux-2.6.28-rc2/mm/Makefile
--- linux-2.6.28-rc2.bc0/mm/Makefile    2008-11-10 18:31:34.000000000 +0900
+++ linux-2.6.28-rc2/mm/Makefile        2008-11-11 14:46:47.000000000 +0900
@@ -34,5 +34,6 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o cpu_alloc.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
+obj-$(CONFIG_CGROUP_PAGE) += page_cgroup.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
diff -dupr linux-2.6.28-rc2.bc0/mm/memcontrol.c linux-2.6.28-rc2/mm/memcontrol.c
--- linux-2.6.28-rc2.bc0/mm/memcontrol.c        2008-11-10 18:31:34.000000000 
+0900
+++ linux-2.6.28-rc2/mm/memcontrol.c    2008-11-11 14:48:17.000000000 +0900
@@ -157,6 +157,11 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
        0, /* FORCE */
 };
 
+void __meminit __init_mem_page_cgroup(struct page_cgroup *pc)
+{
+       pc->mem_cgroup = NULL;
+}
+
 /*
  * Always modified under lru lock. Then, not necessary to preempt_disable()
  */
diff -dupr linux-2.6.28-rc2.bc0/mm/page_cgroup.c 
linux-2.6.28-rc2/mm/page_cgroup.c
--- linux-2.6.28-rc2.bc0/mm/page_cgroup.c       2008-11-10 18:31:34.000000000 
+0900
+++ linux-2.6.28-rc2/mm/page_cgroup.c   2008-11-11 14:46:47.000000000 +0900
@@ -8,13 +8,14 @@
 #include <linux/memory.h>
 #include <linux/vmalloc.h>
 #include <linux/cgroup.h>
+#include <linux/memcontrol.h>
 
 static void __meminit
 __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
 {
        pc->flags = 0;
-       pc->mem_cgroup = NULL;
        pc->page = pfn_to_page(pfn);
+       __init_mem_page_cgroup(pc);
 }
 static unsigned long total_usage;
 
@@ -69,7 +70,7 @@ void __init page_cgroup_init(void)
 
        int nid, fail;
 
-       if (mem_cgroup_subsys.disabled)
+       if (mem_cgroup_disabled())
                return;
 
        for_each_online_node(nid)  {
@@ -229,7 +230,7 @@ void __init page_cgroup_init(void)
        unsigned long pfn;
        int fail = 0;
 
-       if (mem_cgroup_subsys.disabled)
+       if (mem_cgroup_disabled())
                return;
 
        for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.