block: adds fingerprinting support to the block layer The fingerprint is a formed from a triple of flags: * read or write * small or large * random or sequential Signed-off-by: Jake Moilanen --- block/Kconfig | 6 + block/Makefile | 3 block/fingerprinting.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++ block/genhd.c | 23 +++++ block/ll_rw_blk.c | 4 include/linux/genhd.h | 2 6 files changed, 243 insertions(+) Index: linux-rc/block/fingerprinting.c =================================================================== --- /dev/null +++ linux-rc/block/fingerprinting.c @@ -0,0 +1,205 @@ +/* + * block/fingerprinting.c + * + * Jake Moilanen + * Copyright (C) 2006 IBM + * + * I/O Workload Fingerprinting + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as published + * by the Free Software Foundation. +*/ +/* TODOS: + * - Abstract so no so IO specific + * - Abstract types + */ + +#include +#include +#include +#include + +int fingerprint_state_show(struct seq_file *s, void *unused) +{ + phenotype_t * pt = (phenotype_t *)s->private; + struct fingerprint * fp = pt->fp; + + if (fp->type == FP_TYPE_READ) + seq_printf(s, "read\t(%d)\n", FP_TYPE_READ); + else + seq_printf(s, "write\t(%d)\n", FP_TYPE_WRITE); + + if (fp->pattern == FP_PATTERN_SEQ) + seq_printf(s, "sequential\t(%d)\n", FP_PATTERN_SEQ); + else + seq_printf(s, "random\t(%d)\n", FP_PATTERN_RAND); + + if (fp->size == FP_SIZE_SMALL) + seq_printf(s, "small\t(%d)\n", FP_SIZE_SMALL); + else + seq_printf(s, "large\t(%d)\n", FP_SIZE_LARGE); + + return 0; +} + +int fingerprint_snapshot_show(struct seq_file *s, void *unused) +{ + phenotype_t * pt = (phenotype_t *)s->private; + struct fp_snapshot * ss = pt->fp_ss; + + seq_printf(s, "read: %ld\n", ss->reads); + seq_printf(s, "write: %ld\n", ss->writes); + + seq_printf(s, "avg_dist: %ld\n", ss->avg_dist); + seq_printf(s, "avg_size: %ld\n", ss->avg_size); + + return 0; +} + + +int fingerprint_top_fitness_show(struct seq_file *s, void *unused) +{ + int i, j, k; + phenotype_t * pt = (phenotype_t *)s->private; + + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + seq_printf(s, "top_fitness[%d][%d][%d]: %lld\n", + i, j, k, pt->top_fitness[i][j][k]); + + return 0; +} + + +/* This assumes that address matches up w/ head_pos */ +static void update_avg_dist(struct fp_snapshot * ss, long head_pos) +{ + long long tmp_dist; + unsigned long total_ops = ss->reads + ss->writes; + long dummy; + + /* set it the first time through */ + if (!ss->head_pos) { + ss->head_pos = head_pos; + return; + } + tmp_dist = ss->head_pos - head_pos; + if (tmp_dist < 0) + tmp_dist = -tmp_dist; + + tmp_dist = tmp_dist - ss->avg_dist; + + divll(&tmp_dist, total_ops, &dummy); + ss->avg_dist += tmp_dist; + + ss->head_pos = head_pos; + +} + +static void update_avg_size(struct fp_snapshot * ss, unsigned long size) +{ + unsigned long total_ops = ss->reads + ss->writes; + long long tmp_size; + long dummy; + + tmp_size = size - ss->avg_size; + divll(&tmp_size, total_ops, &dummy); + ss->avg_size += tmp_size; +// ss->avg_size += (size - ss->avg_size) / total_ops; +} + +void update_fp_snapshot(struct bio * bio) +{ + struct fp_snapshot * ss = bio->bi_bdev->bd_disk->fp_ss; + + /* update type */ + if (bio_data_dir(bio) == READ) + ss->reads++; + else + ss->writes++; + + /* update pattern */ +// update_avg_dist(ss, bio_to_phys(bio)); + update_avg_dist(ss, bio->bi_sector); + + /* update size */ +// update_avg_size(ss, bio_iovec(bio)->bv_len); + update_avg_size(ss, bio_sectors(bio)); + +} + +/* Use this when there's multiple disks, and need to consolidate to a + * system wide fingerprint + */ +void consolidate_fp_snapshot(struct fp_snapshot * master, struct fp_snapshot * instance) +{ + unsigned long total_ops; + long dummy; + long long total_dist; + long long total_size; + + BUG_ON(!master); + BUG_ON(!instance); + + total_dist = master->avg_dist * (master->reads + master->writes); + total_size = master->avg_size * (master->reads + master->writes); + + /* update operations */ + master->reads += instance->reads; + master->writes += instance->writes; + total_ops = master->reads + master->writes; + + /* update distance */ + total_dist += (instance->avg_dist * (instance->reads + instance->writes)); + if (total_ops) { + divll(&total_dist, total_ops, &dummy); + master->avg_dist = total_dist; + } else + master->avg_dist = 0; + + /* update size */ + total_size += (instance->avg_size * (instance->reads + instance->writes)); + if (total_ops) { + divll(&total_size, total_ops, &dummy); + master->avg_size = total_size; + } else + master->avg_size = 0; +} + +void reset_fp_snapshot(struct fp_snapshot * ss) +{ + memset(ss, 0, sizeof(struct fp_snapshot)); +} + +void reset_fp(struct fingerprint * fp) +{ + memset(fp, 0, sizeof(struct fingerprint)); +} + +//void calc_fp(struct fingerprint * fp, struct fp_snapshot * fp_ss, struct block_device * dev) +void calc_fp(struct fingerprint * fp, struct fp_snapshot * fp_ss) +{ + /* type */ + if (fp_ss->reads > (fp_ss->writes * FP_CLASS_READ_WRITE_RATIO)) + fp->type = FP_TYPE_READ; + else + fp->type = FP_TYPE_WRITE; + + /* pattern */ +// if (fp_ss->avg_dist >= (block_size(dev) * FP_CLASS_PATTERN_RAND)) + if (fp_ss->avg_dist >= (512 * FP_CLASS_PATTERN_RAND)) + fp->pattern = FP_PATTERN_RAND; + else + fp->pattern = FP_PATTERN_SEQ; + + /* size */ + if (fp_ss->avg_size > FP_CLASS_SIZE_LARGE) + fp->size = FP_SIZE_LARGE; + else + fp->size = FP_SIZE_SMALL; +} + + + Index: linux-rc/block/ll_rw_blk.c =================================================================== --- linux-rc.orig/block/ll_rw_blk.c +++ linux-rc/block/ll_rw_blk.c @@ -28,6 +28,7 @@ #include #include #include +#include /* * for max sense size @@ -2858,6 +2859,9 @@ static int __make_request(request_queue_ rw = bio_data_dir(bio); sync = bio_sync(bio); +#ifdef CONFIG_FINGERPRINTING + update_fp_snapshot(bio); +#endif /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even Index: linux-rc/include/linux/genhd.h =================================================================== --- linux-rc.orig/include/linux/genhd.h +++ linux-rc/include/linux/genhd.h @@ -60,6 +60,7 @@ struct partition { #include #include #include +#include struct partition { unsigned char boot_ind; /* 0x80 - active */ @@ -128,6 +129,7 @@ struct gendisk { #else struct disk_stats dkstats; #endif + struct fp_snapshot * fp_ss; }; /* Structure for sysfs attributes on block devices */ Index: linux-rc/block/genhd.c =================================================================== --- linux-rc.orig/block/genhd.c +++ linux-rc/block/genhd.c @@ -387,6 +387,20 @@ static ssize_t disk_stats_read(struct ge jiffies_to_msecs(disk_stat_read(disk, io_ticks)), jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); } +static ssize_t disk_fp_read(struct gendisk * disk, char *page) +{ + return sprintf(page, "reads: %llx\n" + "writes: %llx\n" + "head_pos: %llx\n" + "avg_dist: %llx\n" + "avg_size: %llx\n", + (unsigned long long)disk->fp_ss->reads, + (unsigned long long)disk->fp_ss->writes, + (unsigned long long)disk->fp_ss->head_pos, + (unsigned long long)disk->fp_ss->avg_dist, + (unsigned long long)disk->fp_ss->avg_size); +} + static struct disk_attribute disk_attr_uevent = { .attr = {.name = "uevent", .mode = S_IWUSR }, .store = disk_uevent_store @@ -411,6 +425,10 @@ static struct disk_attribute disk_attr_s .attr = {.name = "stat", .mode = S_IRUGO }, .show = disk_stats_read }; +static struct disk_attribute disk_attr_fp = { + .attr = {.name = "fp", .mode = S_IRUGO }, + .show = disk_fp_read +}; static struct attribute * default_attrs[] = { &disk_attr_uevent.attr, @@ -419,6 +437,7 @@ static struct attribute * default_attrs[ &disk_attr_removable.attr, &disk_attr_size.attr, &disk_attr_stat.attr, + &disk_attr_fp.attr, NULL, }; @@ -628,6 +647,10 @@ struct gendisk *alloc_disk_node(int mino kobject_init(&disk->kobj); rand_initialize_disk(disk); } + + disk->fp_ss = kmalloc(sizeof(struct fp_snapshot), GFP_KERNEL); + memset(disk->fp_ss, 0, sizeof(struct fp_snapshot)); + return disk; } Index: linux-rc/block/Kconfig =================================================================== --- linux-rc.orig/block/Kconfig +++ linux-rc/block/Kconfig @@ -34,3 +34,9 @@ config LSF If unsure, say Y. source block/Kconfig.iosched + +config FINGERPRINTING + bool "I/O Workload Fingerprinting" + help + Say Y here if you want workload data to be classified and + used to tune the I/O schedulers. Otherwise say N. \ No newline at end of file Index: linux-rc/block/Makefile =================================================================== --- linux-rc.orig/block/Makefile +++ linux-rc/block/Makefile @@ -10,3 +10,6 @@ obj-$(CONFIG_IOSCHED_DEADLINE) += deadli obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o + + +obj-$(CONFIG_FINGERPRINTING) += fingerprinting.o