Using Badblocks for testing hdds on the nodes one normally uses
$ badblocks -v -p10 /dev/sdb
which checks the disk. if it makes 10 clean runs it will exit. but if the disk is really damaged it will run forever.

via this patch a new option "-m most_runs" is added, so that
$ badblocks -v -p10 -m 2 /dev/sdb
will run a test only 2 times in any case.

see also bug #241

This actually a patch for e2fsprogs. It patches misc/badblocks.c and misc/badblocks.8.in So it changes the badblocks-program and its man-Page

From 81f2a4661b088d5603bcc542d7dfc5b19058c9bc Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Henrik=20Kr=C3=B6ger?= <henrikkroeger@googlemail.com>
Date: Thu, 16 Oct 2008 17:39:41 +0200
Subject: [PATCH] added option -m most_runs to badblocks

By using -m most_runs you can use -p num_passes without the problem that
the scanning will continue forever on a broken disk.
With -m most_runs, the scanning will cancel after most_runs of tests.
---
 misc/badblocks.8.in |    8 ++++++++
 misc/badblocks.c    |   13 ++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/misc/badblocks.8.in b/misc/badblocks.8.in
index 5a10f8a..23ceab9 100644
--- a/misc/badblocks.8.in
+++ b/misc/badblocks.8.in
@@ -36,6 +36,10 @@ badblocks \- search a device for bad blocks
 .I num_passes
 ]
 [
+.B \-m
+.I most_runs
+]
+[
 .B \-t
 .I test_pattern
 ]
@@ -150,6 +154,10 @@ Default is 0, meaning
 .B badblocks
 will exit after the first pass.
 .TP
+.BI \-m " most_runs"
+Cancel scanning after given number of scans.
+Default is 0, meaning never cancel scanning.
+.TP
 .BI \-t " test_pattern"
 Specify a test pattern to be read (and written) to disk blocks.   The
 .I test_pattern
diff --git a/misc/badblocks.c b/misc/badblocks.c
index f7d67a7..2b81963 100644
--- a/misc/badblocks.c
+++ b/misc/badblocks.c
@@ -88,7 +88,7 @@ static void usage(void)
    fprintf(stderr, _(
 "Usage: %s [-b block_size] [-i input_file] [-o output_file] [-svwnf]\n"
 "       [-c blocks_at_once] [-d delay_factor_between_reads] [-e max_bad_blocks]\n"
-"       [-p num_passes] [-t test_pattern [-t test_pattern [...]]]\n"
+"       [-p num_passes] [-m most_runs] [-t test_pattern [-t test_pattern [...]]]\n"
 "       device [last_block [first_block]]\n"),
        program_name);
    exit (1);
@@ -972,6 +972,8 @@ int main (int argc, char ** argv)
    blk_t last_block, first_block;
    int num_passes = 0;
    int passes_clean = 0;
+   int most_runs = 0;
+   int runs_done = 0;
    int dev;
    errcode_t errcode;
    unsigned int pattern;
@@ -1006,7 +1008,7 @@ int main (int argc, char ** argv)
 
    if (argc && *argv)
       program_name = *argv;
-   while ((c = getopt (argc, argv, "b:d:e:fi:o:svwnc:p:h:t:X")) != EOF) {
+   while ((c = getopt (argc, argv, "b:d:e:fi:o:svwnc:p:m:h:t:X")) != EOF) {
       switch (c) {
       case 'b':
          block_size = parse_uint(optarg, "block size");
@@ -1056,6 +1058,10 @@ int main (int argc, char ** argv)
          num_passes = parse_uint(optarg,
                   "number of clean passes");
          break;
+      case 'm':
+         most_runs = parse_uint(optarg,
+                  "maximum number of runs");
+         break;
       case 'h':
          host_device_name = optarg;
          break;
@@ -1226,13 +1232,14 @@ int main (int argc, char ** argv)
          passes_clean = 0;
       else
          ++passes_clean;
+      runs_done++;
 
       if (v_flag)
          fprintf(stderr,
             _("Pass completed, %u bad blocks found.\n"),
             bb_count);
 
-   } while (passes_clean < num_passes);
+   } while (passes_clean < num_passes && (most_runs < 1 || runs_done < most_runs));
 
    close (dev);
    if (out != stdout)
-- 
1.5.6.5

For the etch-version of badblocks another patch is neccessary:

From 1e9a75e36541fa90fc1005e098428f7aa7cf6ea6 Mon Sep 17 00:00:00 2001
From: Henrik Kroeger <henrik@postfixserver>
Date: Mon, 27 Oct 2008 21:38:32 +0100
Subject: [PATCH] added option -m most_runs to badblocks
 
When using the "-p num_passes" for badblocks the scanning will continue
almost forever on a really bad disk. Especially in batch-mode this might
not be ideal.
By using -m most_runs you can use -p num_passes without the problem that
the scanning will continue forever on a broken disk.
With -m most_runs, the scanning will cancel after most_runs of tests.
---
 misc/badblocks.8.in |    8 ++++++++
 misc/badblocks.c    |   18 ++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/misc/badblocks.8.in b/misc/badblocks.8.in
index 1719db2..74d48a7 100644
--- a/misc/badblocks.8.in
+++ b/misc/badblocks.8.in
@@ -28,6 +28,10 @@ badblocks \- search a device for bad blocks
 .I num_passes
 ]
 [
+.B \-m
+.I most_runs
+]
+[
 .B \-t
 .I test_pattern
 ]
@@ -129,6 +133,10 @@ Default is 0, meaning
 .B badblocks
 will exit after the first pass.
 .TP
+.BI \-m " most_runs"
+Cancel scanning after given number of scans.
+Default is 0, meaning never cancel scanning.
+.TP
 .BI \-t " test_pattern"
 Specify a test pattern to be read (and written) to disk blocks.   The
 .I test_pattern
diff --git a/misc/badblocks.c b/misc/badblocks.c
index 9867668..74fd53c 100644
--- a/misc/badblocks.c
+++ b/misc/badblocks.c
@@ -76,7 +76,7 @@ int sys_page_size = 4096;

 static void usage(void)
 {
-       fprintf(stderr, _("Usage: %s [-b block_size] [-i input_file] [-o output_file] [-svwnf]\n [-c blocks_at_once] [-p num_passes] [-t test_pattern [-t test_pattern [...]]]\n device [last_block [start_block]]\n"),
+       fprintf(stderr, _("Usage: %s [-b block_size] [-i input_file] [-o output_file] [-svwnf]\n [-c blocks_at_once] [-p num_passes] [-m most_runs] [-t test_pattern [-t test_pattern [...]]]\n device [last_block [start_block]]\n"),
                 program_name);
        exit (1);
 }
@@ -825,6 +825,8 @@ int main (int argc, char ** argv)
        unsigned long blocks_at_once = 64;
        blk_t last_block, from_count;
        int num_passes = 0;
+       int most_runs = 0;
+       int done_runs = 0;
        int passes_clean = 0;
        int dev;
        errcode_t errcode;
@@ -860,7 +862,7 @@ int main (int argc, char ** argv)
        
        if (argc && *argv)
                program_name = *argv;
-       while ((c = getopt (argc, argv, "b:fi:o:svwnc:p:h:t:X")) != EOF) {
+       while ((c = getopt (argc, argv, "b:fi:o:svwnc:p:m:h:t:X")) != EOF) {
                switch (c) {
                case 'b':
                        block_size = strtoul (optarg, &tmp, 0);
@@ -913,6 +915,14 @@ int main (int argc, char ** argv)
                                exit (1);
                        }
                        break;
+                case 'm':
+                       most_runs = strtoul (optarg, &tmp, 0);
+                       if (*tmp) {
+                               com_err (program_name, 0,
+                                       "bad number of most runs - %s", optarg);
+                               exit (1);
+                       }
+                       break;
                case 'h':
                        host_device_name = optarg;
                        break;
@@ -1100,6 +1110,10 @@ int main (int argc, char ** argv)
                                _("Pass completed, %u bad blocks found.\n"), 
                                bb_count);
 
+               ++done_runs;
+               if( most_runs > 0 && done_runs >= most_runs )
+                       break;
+
        } while (passes_clean < num_passes);
 
        close (dev);
-- 
1.5.5.GIT

-- HenrikKroeger - 05 Nov 2008

DocumentationForm edit

Title How to patch the bad blocks script
Description This pages shows how to patch the bad blocks script so that it does not run forever
Tags bad blocks patch
Category Admin
This topic: ATLAS > WebHome > GeneralDocumentation > BadblocksPatch
Topic revision: 10 Feb 2012, ArthurVarkentin
This site is powered by FoswikiCopyright © by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Foswiki? Send feedback