Incremental back up of Windows XP to ZFS

I am forced to have a Windows system at home which thankfully only very occasionally gets used however even though everything that gets on it is virus scanned all email is scanned before it gets near it and none of the users are administrators I still like to keep it backed up.

Given I have a server on a network which has ZFS file systems with capacity I decided that I could do this just using the dd(1) command which I have written about before. Using that to copy the entire disk image to a ZFS file allows me to back the system up. However if I snapshot the back up file system and then back up again every block gets re written so takes up space on the server enven if they have not changed (roll on de dup). To stop this I have a tiny program that mmap()s the entire backup file and then only updates the blocks that have changed.

I call it syncer for no good reason:

#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <stdio.h>
#include <sys/time.h>
/\*
 \* Build by:
 \*              cc -m64 -o syncer syncer.c
 \*/

/\*
 \* Match this to the file system record size.
 \*/
#define BLOCK_SIZE (128 \* 1024)
#define KILO 1024
#define MEG (KILO \* KILO)
#define MSEC (1000LL)
#define NSEC (MSEC \* MSEC)
#define USEC (NSEC \* MSEC)

static long block_size;

char \*
map_file(const char \*file)
{
        int fd;
        char \*addr;
        struct stat buf;

        if ((fd = open(file, O_RDWR)) == -1) {
                return (NULL);
        }

        if (fstat(fd, &buf) == -1) {
                close(fd);
                return (NULL);
        }

        block_size = buf.st_blksize;

        addr = mmap(0, buf.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
        close(fd);
        return (addr);
}
off64_t
read_whole(int fd, char \*buf, int len)
{
        int count;
        int total = 0;

        while (total != len && 
                (count = read(0, &buf[total], len - total)) > 0) {
                total+=count;
        }
        return (total);
}
static void
print_amount(char \*str, off64_t value)
{
        if (value < KILO) {
                printf("%s %8lld ", str, value);
        } else if (value < MEG) {
                printf("%s %8lldK", str, value/(KILO));
        } else {
                printf("%s %8lldM", str, value/(MEG));
        }
}
int
main(int argc, char \*\*argv)
{
        char \*buf;
        off64_t offset = 0;
        off64_t update = 0;
        off64_t count;
        off64_t tcount = 0;
        char \*addr;
        long bs;
        hrtime_t starttime;
        hrtime_t lasttime;

        if (argc == 1) {
                fprintf(stderr, "Usage: %s outfile\\n", \*argv);
                exit(1);
        }
        if ((addr = map_file(argv[1])) == NULL) {
                exit(1);
        }
        bs = block_size == 0 ? BLOCK_SIZE : block_size;
        if ((buf = malloc(block_size == 0 ? BLOCK_SIZE : block_size)) == NULL) {
                perror("malloc failed");
                exit(1);
        }

        print_amount("Block size:", bs);
        printf("\\n");
        fflush(stdout);

        starttime = lasttime = gethrtime();
        while ((count = read_whole(0, buf, bs)) > 0) {
                hrtime_t thistime;
                if (memcmp(buf, addr+offset, count) != 0) {
                        memcpy(addr+offset, buf, count);
                        update+=count;
                }
                madvise(addr+offset, count, MADV_DONTNEED);
                offset+=count;
                madvise(addr+offset, bs, MADV_WILLNEED);
                thistime = gethrtime();
                /\*
                 \* Only update the output after a second so that is readable.
                 \*/
                if (thistime - lasttime > USEC) {
                        print_amount("checked", offset);
                        printf(" %4d M/sec ", ((hrtime_t)tcount \* USEC) /
                                (MEG \* (thistime - lasttime)));
                        print_amount(" updated", update);
                        printf("\\r");
                        fflush(stdout);
                        lasttime = thistime;
                        tcount = 0;
                } else { 
                        tcount += count;
                }
        }
        printf("                                            \\r");
        print_amount("Read: ", offset);
        printf(" %lld M/sec ", (offset \* NSEC) /
                (MEG \* ((gethrtime() - starttime)/MSEC)));
        print_amount("Updated:", update);
        printf("\\n");
        /\* If nothing is updated return false \*/
        exit(update == 0 ? 1 : 0);
}



Then a simple shell function to do the back up and then snapshot the file system:

function backuppc
{
	ssh -o Compression=no -c blowfish pc pfexec /usr/local/sbin/xp_backup | time ~/lang/c/syncer /tank/backup/pc/backup.dd && \\
	pfexec /usr/sbin/zfs snapshot tank/backup/pc@$(date +%F)
}

Running it I see that only 2.5G of data was actually written to disk, and yet thanks to ZFS I have a complete disk image and have not lost the previous disk images.


: pearson FSS 17 $; backuppc
665804+0 records in
665804+0 records out
Read:     20481M 9 M/sec Updated:     2584M 

real    35m50.00s
user    6m27.98s
sys     2m43.76s
: pearson FSS 18 $; 
Comments:

I backup my files and emails with a very easy to use software called Titan Backup, the destination is a 500gb WD harddisk, but the company stated that an online feature is under work also. They have a 15 day free trial on their website.

I also got my hands on a 30% discount coupon from them, during the order process enter this coupon:NEOB-M5VL

Posted by Alec Reynolds on October 07, 2008 at 03:31 AM BST #

Since I have no data on the Windows PC at all I only need to back up the system files so dd works well for me. All the real data is held on a mirrored ZFS pool and is snapshotted up every 10 minutes. Plus I also back that data up to an external drive once a day.

Posted by Chris Gerhard on October 07, 2008 at 03:47 AM BST #

Post a Comment:
Comments are closed for this entry.
About

This is the old blog of Chris Gerhard. It has mostly moved to http://chrisgerhard.wordpress.com

Search

Archives
« April 2014
MonTueWedThuFriSatSun
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
    
       
Today