patch-2.2.16 linux/drivers/block/ll_rw_blk.c
Next file: linux/drivers/block/md.c
Previous file: linux/drivers/block/ide.h
Back to the patch index
Back to the overall index
- Lines: 589
- Date:
Wed Jun 7 14:26:42 2000
- Orig file:
v2.2.15/linux/drivers/block/ll_rw_blk.c
- Orig date:
Wed May 3 17:16:33 2000
diff -urN v2.2.15/linux/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 1994, Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
*/
/*
@@ -20,6 +21,7 @@
#include <asm/system.h>
#include <asm/io.h>
+#include <asm/uaccess.h>
#include <linux/blk.h>
#include <linux/module.h>
@@ -53,11 +55,11 @@
/*
* used to wait on when there are no free requests
*/
-struct wait_queue * wait_for_request = NULL;
+struct wait_queue * wait_for_request;
/* This specifies how many sectors to read ahead on the disk. */
-int read_ahead[MAX_BLKDEV] = {0, };
+int read_ahead[MAX_BLKDEV];
/* blk_dev_struct is:
* *request_fn
@@ -73,7 +75,7 @@
*
* if (!blk_size[MAJOR]) then no minor size checking is done.
*/
-int * blk_size[MAX_BLKDEV] = { NULL, NULL, };
+int * blk_size[MAX_BLKDEV];
/*
* blksize_size contains the size of all block-devices:
@@ -82,7 +84,7 @@
*
* if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
*/
-int * blksize_size[MAX_BLKDEV] = { NULL, NULL, };
+int * blksize_size[MAX_BLKDEV];
/*
* hardsect_size contains the size of the hardware sector of a device.
@@ -96,22 +98,22 @@
* This is currently set by some scsi devices and read by the msdos fs driver.
* Other uses may appear later.
*/
-int * hardsect_size[MAX_BLKDEV] = { NULL, NULL, };
+int * hardsect_size[MAX_BLKDEV];
/*
* The following tunes the read-ahead algorithm in mm/filemap.c
*/
-int * max_readahead[MAX_BLKDEV] = { NULL, NULL, };
+int * max_readahead[MAX_BLKDEV];
/*
* Max number of sectors per request
*/
-int * max_sectors[MAX_BLKDEV] = { NULL, NULL, };
+int * max_sectors[MAX_BLKDEV];
/*
* Max number of segments per request
*/
-int * max_segments[MAX_BLKDEV] = { NULL, NULL, };
+int * max_segments[MAX_BLKDEV];
static inline int get_max_sectors(kdev_t dev)
{
@@ -142,6 +144,17 @@
return &blk_dev[major].current_request;
}
+static inline int get_request_latency(elevator_t * elevator, int rw)
+{
+ int latency;
+
+ latency = elevator->read_latency;
+ if (rw != READ)
+ latency = elevator->write_latency;
+
+ return latency;
+}
+
/*
* remove the plug and let it rip..
*/
@@ -291,6 +304,197 @@
printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
}
+static int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg)
+{
+ int ret;
+ blkelv_ioctl_arg_t output;
+
+ output.queue_ID = elevator->queue_ID;
+ output.read_latency = elevator->read_latency;
+ output.write_latency = elevator->write_latency;
+ output.max_bomb_segments = elevator->max_bomb_segments;
+
+ ret = -EFAULT;
+ if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t)))
+ goto out;
+ ret = 0;
+ out:
+ return ret;
+}
+
+static int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg)
+{
+ blkelv_ioctl_arg_t input;
+ int ret;
+
+ ret = -EFAULT;
+ if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t)))
+ goto out;
+
+ ret = -EINVAL;
+ if (input.read_latency < 0)
+ goto out;
+ if (input.write_latency < 0)
+ goto out;
+ if (input.max_bomb_segments <= 0)
+ goto out;
+
+ elevator->read_latency = input.read_latency;
+ elevator->write_latency = input.write_latency;
+ elevator->max_bomb_segments = input.max_bomb_segments;
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+int blkelv_ioctl(kdev_t dev, unsigned long cmd, unsigned long arg)
+{
+ elevator_t * elevator = &blk_dev[MAJOR(dev)].elevator;
+ blkelv_ioctl_arg_t * __arg = (blkelv_ioctl_arg_t *) arg;
+
+ switch (cmd) {
+ case BLKELVGET:
+ return blkelvget_ioctl(elevator, __arg);
+ case BLKELVSET:
+ return blkelvset_ioctl(elevator, __arg);
+ }
+ return -EINVAL;
+}
+
+static inline int seek_to_not_starving_chunk(struct request ** req, int * lat)
+{
+ struct request * tmp = *req;
+ int found = 0, pos = 0;
+ int last_pos = 0, __lat = *lat;
+
+ do {
+ if (tmp->elevator_latency <= 0)
+ {
+ *req = tmp;
+ found = 1;
+ last_pos = pos;
+ if (last_pos >= __lat)
+ break;
+ }
+ pos += tmp->nr_segments;
+ } while ((tmp = tmp->next));
+ *lat -= last_pos;
+
+ return found;
+}
+
+#define CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY \
+ case IDE0_MAJOR: /* same as HD_MAJOR */ \
+ case IDE1_MAJOR: \
+ case FLOPPY_MAJOR: \
+ case IDE2_MAJOR: \
+ case IDE3_MAJOR: \
+ case IDE4_MAJOR: \
+ case IDE5_MAJOR: \
+ case ACSI_MAJOR: \
+ case MFM_ACORN_MAJOR: \
+ case MDISK_MAJOR: \
+ case DASD_MAJOR:
+#define CASE_COALESCE_ALSO_FIRST_REQUEST \
+ case SCSI_DISK0_MAJOR: \
+ case SCSI_DISK1_MAJOR: \
+ case SCSI_DISK2_MAJOR: \
+ case SCSI_DISK3_MAJOR: \
+ case SCSI_DISK4_MAJOR: \
+ case SCSI_DISK5_MAJOR: \
+ case SCSI_DISK6_MAJOR: \
+ case SCSI_DISK7_MAJOR: \
+ case SCSI_CDROM_MAJOR: \
+ case DAC960_MAJOR+0: \
+ case DAC960_MAJOR+1: \
+ case DAC960_MAJOR+2: \
+ case DAC960_MAJOR+3: \
+ case DAC960_MAJOR+4: \
+ case DAC960_MAJOR+5: \
+ case DAC960_MAJOR+6: \
+ case DAC960_MAJOR+7: \
+ case COMPAQ_SMART2_MAJOR+0: \
+ case COMPAQ_SMART2_MAJOR+1: \
+ case COMPAQ_SMART2_MAJOR+2: \
+ case COMPAQ_SMART2_MAJOR+3: \
+ case COMPAQ_SMART2_MAJOR+4: \
+ case COMPAQ_SMART2_MAJOR+5: \
+ case COMPAQ_SMART2_MAJOR+6: \
+ case COMPAQ_SMART2_MAJOR+7:
+
+#define elevator_starve_rest_of_queue(req) \
+do { \
+ struct request * tmp = (req); \
+ for ((tmp) = (tmp)->next; (tmp); (tmp) = (tmp)->next) \
+ (tmp)->elevator_latency--; \
+} while (0)
+
+static inline void elevator_queue(struct request * req,
+ struct request * tmp,
+ int latency,
+ struct blk_dev_struct * dev,
+ struct request ** queue_head)
+{
+ struct request * __tmp;
+ int starving, __latency;
+
+ starving = seek_to_not_starving_chunk(&tmp, &latency);
+ __tmp = tmp;
+ __latency = latency;
+
+ for (;; tmp = tmp->next)
+ {
+ if ((latency -= tmp->nr_segments) <= 0)
+ {
+ tmp = __tmp;
+ latency = __latency - tmp->nr_segments;
+
+ if (starving)
+ break;
+
+ switch (MAJOR(req->rq_dev))
+ {
+ CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY
+ if (tmp == dev->current_request)
+ default:
+ goto link;
+ CASE_COALESCE_ALSO_FIRST_REQUEST
+ }
+
+ latency += tmp->nr_segments;
+ req->next = tmp;
+ *queue_head = req;
+ goto after_link;
+ }
+
+ if (!tmp->next)
+ break;
+
+ {
+ const int after_current = IN_ORDER(tmp,req);
+ const int before_next = IN_ORDER(req,tmp->next);
+
+ if (!IN_ORDER(tmp,tmp->next)) {
+ if (after_current || before_next)
+ break;
+ } else {
+ if (after_current && before_next)
+ break;
+ }
+ }
+ }
+
+ link:
+ req->next = tmp->next;
+ tmp->next = req;
+
+ after_link:
+ req->elevator_latency = latency;
+
+ elevator_starve_rest_of_queue(req);
+}
+
/*
* add-request adds a request to the linked list.
* It disables interrupts (aquires the request spinlock) so that it can muck
@@ -309,31 +513,38 @@
short disk_index;
unsigned long flags;
int queue_new_request = 0;
+ int latency;
switch (major) {
case DAC960_MAJOR+0:
disk_index = (minor & 0x00f8) >> 3;
- if (disk_index < 4)
- drive_stat_acct(req->cmd, req->nr_sectors, disk_index);
break;
case SCSI_DISK0_MAJOR:
+ case COMPAQ_SMART2_MAJOR+0:
+ case COMPAQ_SMART2_MAJOR+1:
+ case COMPAQ_SMART2_MAJOR+2:
+ case COMPAQ_SMART2_MAJOR+3:
+ case COMPAQ_SMART2_MAJOR+4:
+ case COMPAQ_SMART2_MAJOR+5:
+ case COMPAQ_SMART2_MAJOR+6:
+ case COMPAQ_SMART2_MAJOR+7:
disk_index = (minor & 0x00f0) >> 4;
- if (disk_index < 4)
- drive_stat_acct(req->cmd, req->nr_sectors, disk_index);
break;
case IDE0_MAJOR: /* same as HD_MAJOR */
case XT_DISK_MAJOR:
disk_index = (minor & 0x0040) >> 6;
- drive_stat_acct(req->cmd, req->nr_sectors, disk_index);
break;
case IDE1_MAJOR:
disk_index = ((minor & 0x0040) >> 6) + 2;
- drive_stat_acct(req->cmd, req->nr_sectors, disk_index);
+ break;
default:
+ disk_index = -1;
break;
}
+ if (disk_index >= 0 && disk_index < 4)
+ drive_stat_acct(req->cmd, req->nr_sectors, disk_index);
- req->next = NULL;
+ latency = get_request_latency(&dev->elevator, req->cmd);
/*
* We use the goto to reduce locking complexity
@@ -344,25 +555,14 @@
if (req->bh)
mark_buffer_clean(req->bh);
if (!(tmp = *current_request)) {
+ req->next = NULL;
+ req->elevator_latency = latency;
*current_request = req;
if (dev->current_request != &dev->plug)
queue_new_request = 1;
goto out;
}
- for ( ; tmp->next ; tmp = tmp->next) {
- const int after_current = IN_ORDER(tmp,req);
- const int before_next = IN_ORDER(req,tmp->next);
-
- if (!IN_ORDER(tmp,tmp->next)) {
- if (after_current || before_next)
- break;
- } else {
- if (after_current && before_next)
- break;
- }
- }
- req->next = tmp->next;
- tmp->next = req;
+ elevator_queue(req, tmp, latency, dev, current_request);
/* for SCSI devices, call request_fn unconditionally */
if (scsi_blk_major(major) ||
@@ -399,6 +599,8 @@
total_segments--;
if (total_segments > max_segments)
return;
+ if (next->elevator_latency < req->elevator_latency)
+ req->elevator_latency = next->elevator_latency;
req->bhtail->b_reqnext = next->bh;
req->bhtail = next->bhtail;
req->nr_sectors += next->nr_sectors;
@@ -408,12 +610,28 @@
wake_up (&wait_for_request);
}
+#define read_pendings(req) \
+({ \
+ int __ret = 0; \
+ struct request * tmp = (req); \
+ do { \
+ if (tmp->cmd == READ) \
+ { \
+ __ret = 1; \
+ break; \
+ } \
+ tmp = tmp->next; \
+ } while (tmp); \
+ __ret; \
+})
+
void make_request(int major, int rw, struct buffer_head * bh)
{
unsigned int sector, count;
- struct request * req;
+ struct request * req, * prev;
int rw_ahead, max_req, max_sectors, max_segments;
unsigned long flags;
+ int latency, starving;
count = bh->b_size >> 9;
sector = bh->b_rsector;
@@ -490,6 +708,8 @@
max_sectors = get_max_sectors(bh->b_rdev);
max_segments = get_max_segments(bh->b_rdev);
+ latency = get_request_latency(&blk_dev[major].elevator, rw);
+
/*
* Now we acquire the request spinlock, we have to be mega careful
* not to schedule or do something nonatomic
@@ -502,17 +722,7 @@
major != DDV_MAJOR && major != NBD_MAJOR)
plug_device(blk_dev + major); /* is atomic */
} else switch (major) {
- case IDE0_MAJOR: /* same as HD_MAJOR */
- case IDE1_MAJOR:
- case FLOPPY_MAJOR:
- case IDE2_MAJOR:
- case IDE3_MAJOR:
- case IDE4_MAJOR:
- case IDE5_MAJOR:
- case ACSI_MAJOR:
- case MFM_ACORN_MAJOR:
- case MDISK_MAJOR:
- case DASD_MAJOR:
+ CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY
/*
* The scsi disk and cdrom drivers completely remove the request
* from the queue when they start processing an entry. For this
@@ -523,37 +733,20 @@
* entry may be busy being processed and we thus can't change it.
*/
if (req == blk_dev[major].current_request)
- req = req->next;
- if (!req)
- break;
+ {
+ if (!(req = req->next))
+ break;
+ latency -= req->nr_segments;
+ }
/* fall through */
+ CASE_COALESCE_ALSO_FIRST_REQUEST
- case SCSI_DISK0_MAJOR:
- case SCSI_DISK1_MAJOR:
- case SCSI_DISK2_MAJOR:
- case SCSI_DISK3_MAJOR:
- case SCSI_DISK4_MAJOR:
- case SCSI_DISK5_MAJOR:
- case SCSI_DISK6_MAJOR:
- case SCSI_DISK7_MAJOR:
- case SCSI_CDROM_MAJOR:
- case DAC960_MAJOR+0:
- case DAC960_MAJOR+1:
- case DAC960_MAJOR+2:
- case DAC960_MAJOR+3:
- case DAC960_MAJOR+4:
- case DAC960_MAJOR+5:
- case DAC960_MAJOR+6:
- case DAC960_MAJOR+7:
- case COMPAQ_SMART2_MAJOR+0:
- case COMPAQ_SMART2_MAJOR+1:
- case COMPAQ_SMART2_MAJOR+2:
- case COMPAQ_SMART2_MAJOR+3:
- case COMPAQ_SMART2_MAJOR+4:
- case COMPAQ_SMART2_MAJOR+5:
- case COMPAQ_SMART2_MAJOR+6:
- case COMPAQ_SMART2_MAJOR+7:
+ /* avoid write-bombs to not hurt iteractiveness of reads */
+ if (rw != READ && read_pendings(req))
+ max_segments = blk_dev[major].elevator.max_bomb_segments;
+ starving = seek_to_not_starving_chunk(&req, &latency);
+ prev = NULL;
do {
if (req->sem)
continue;
@@ -565,24 +758,34 @@
continue;
/* Can we add it to the end of this request? */
if (req->sector + req->nr_sectors == sector) {
+ if (latency - req->nr_segments < 0)
+ break;
if (req->bhtail->b_data + req->bhtail->b_size
!= bh->b_data) {
if (req->nr_segments < max_segments)
req->nr_segments++;
- else continue;
+ else break;
}
req->bhtail->b_reqnext = bh;
req->bhtail = bh;
req->nr_sectors += count;
+
+ /* latency stuff */
+ if ((latency -= req->nr_segments) < req->elevator_latency)
+ req->elevator_latency = latency;
+ elevator_starve_rest_of_queue(req);
+
/* Can we now merge this req with the next? */
attempt_merge(req, max_sectors, max_segments);
/* or to the beginning? */
} else if (req->sector - count == sector) {
+ if (!prev && starving)
+ break;
if (bh->b_data + bh->b_size
!= req->bh->b_data) {
if (req->nr_segments < max_segments)
req->nr_segments++;
- else continue;
+ else break;
}
bh->b_reqnext = req->bh;
req->bh = bh;
@@ -590,6 +793,14 @@
req->current_nr_sectors = count;
req->sector = sector;
req->nr_sectors += count;
+
+ /* latency stuff */
+ if (latency < --req->elevator_latency)
+ req->elevator_latency = latency;
+ elevator_starve_rest_of_queue(req);
+
+ if (prev)
+ attempt_merge(prev, max_sectors, max_segments);
} else
continue;
@@ -597,7 +808,8 @@
spin_unlock_irqrestore(&io_request_lock,flags);
return;
- } while ((req = req->next) != NULL);
+ } while (prev = req,
+ (latency -= req->nr_segments) >= 0 && (req = req->next) != NULL);
}
/* find an unused request. */
@@ -623,7 +835,6 @@
req->sem = NULL;
req->bh = bh;
req->bhtail = bh;
- req->next = NULL;
add_request(major+blk_dev,req);
return;
@@ -781,6 +992,7 @@
{
struct request * req;
struct blk_dev_struct *dev;
+ static unsigned int queue_ID;
for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) {
dev->request_fn = NULL;
@@ -792,12 +1004,13 @@
dev->plug_tq.sync = 0;
dev->plug_tq.routine = &unplug_device;
dev->plug_tq.data = dev;
+ dev->elevator = ELEVATOR_DEFAULTS;
+ dev->elevator.queue_ID = queue_ID++;
}
req = all_requests + NR_REQUEST;
while (--req >= all_requests) {
req->rq_status = RQ_INACTIVE;
- req->next = NULL;
}
memset(ro_bits,0,sizeof(ro_bits));
memset(max_readahead, 0, sizeof(max_readahead));
@@ -902,9 +1115,13 @@
#ifdef CONFIG_DASD
dasd_init();
#endif
+#ifdef CONFIG_BLK_DEV_XPRAM
+ xpram_init();
+#endif
return 0;
};
EXPORT_SYMBOL(io_request_lock);
EXPORT_SYMBOL(end_that_request_first);
EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blkelv_ioctl);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)