C言語でファイルをコピーする(マルチスレッド&ダイレクトI/O編)

※2012年8月9日、追記。普通のファイルコピーのソースが見たい場合はcopybenchのソースが参考になると思う。このページの内容はやや古くなっているうえ、ソースコードの質が低いので注意。

そこに至る経緯。

  1. mmapが速いらしいと知ってググる
  2. ファイルコピーだとmmapよりread/writeの方が早い、という事を知る。
  3. copybench-1.0をちょっと弄ってO_DIRECT使うようにしたら読み書きの速度が2/3程度にまで落ちた。
  4. 試しにreadだけやったらごっつ早い(つまりwriteが遅い?)
  5. 何となくpthread使ったら速度改善(゚∀゚)

シングルスレッドだと遅いのかな?よく分からん。

参考:C言語: UNIX最速ファイルコピー

参考:C言語: write(2)の正しい使い方

関連:C言語でフォルダ(ディレクトリ)を丸ごとコピーする

/*
gcc -Wall -std=c99 `pkg-config --cflags --libs glib-2.0` -lpthread -O -o cp_thread_dio cp_thread_dio.c
*/

#define _XOPEN_SOURCE 600
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
#define _LARGEFILE64_SOURCE

#define IO_BUF 16777216
#define PATH_LEN 8192

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <utime.h>

#include <glib.h>
#include <pthread.h>

char *buf1; /*ファイルを読み書きする際のバッファ*/
char *buf2;
int rcount_1 = -1;
int rcount_2 = -1;
int wcount_1 = -1;
int wcount_2 = -1;
int woffset = 0;

void * r_func1(void *read_tmp)
{
  rcount_1 = read(*(int *)read_tmp, buf1, IO_BUF);

  if(rcount_1 == -1)
  {
    perror("r_func1");
    exit(-1);
  }

  return NULL;
}

void * w_func1(void *write_tmp)
{
  if(rcount_1 == IO_BUF)
  {
    wcount_1 = pwrite(*(int *)write_tmp, buf1, rcount_1, woffset);
    woffset += rcount_1;

    if(wcount_1 == -1)
    {
      perror("w_func1");
      exit(-1);
    }

    printf("wcount_1 : %d\n", wcount_1);
    printf("woffset : %lld\n", (long long)lseek(*(int *)write_tmp, woffset, SEEK_SET));
  }
  else
  {
    wcount_1 = pwrite(*(int *)write_tmp, buf1, (rcount_1 - (rcount_1 % 512)) + 512, woffset);
    woffset += rcount_1;

    if(wcount_1 == -1)
    {
      perror("w_func1");
      exit(-1);
    }

    printf("wcount_1 : %d\n", wcount_1);
    printf("woffset : %lld\n", (long long)lseek(*(int *)write_tmp, woffset, SEEK_SET));
  }

  return NULL;
}

void * r_func2(void *read_tmp)
{
  rcount_2 = read(*(int *)read_tmp, buf2, IO_BUF);

  if(rcount_2 == -1)
  {
    perror("r_func2");
    exit(-1);
  }

  return NULL;
}

void * w_func2(void *write_tmp)
{
  if(rcount_2 == IO_BUF)
  {
    wcount_2 = write(*(int *)write_tmp, buf2, rcount_2);
    woffset += rcount_2;

    if(wcount_2 == -1)
    {
      perror("w_func2");
      exit(-1);
    }

    printf("wcount_2 : %d\n", wcount_2);
    printf("woffset : %lld\n", (long long)lseek(*(int *)write_tmp, woffset, SEEK_SET));
  }
  else
  {
    wcount_2 = pwrite(*(int *)write_tmp, buf2, (rcount_2 - (rcount_2 % 512)) + 512, woffset);
    woffset += rcount_2;

    if(wcount_2 == -1)
    {
      perror("w_func2");
      exit(-1);
    }

    printf("wcount_2 : %d\n", wcount_2);
    printf("woffset : %lld\n", (long long)lseek(*(int *)write_tmp, woffset, SEEK_SET));
  }

  return NULL;
}

void read_write(const char *from, const char *target_file)
{
  struct stat stat_buf;
  //今回はutimensatを使わない
  struct utimbuf times;
  pthread_t t1;
  pthread_t t2;
  int ifrom = open(from, O_RDONLY | O_DIRECT | O_NOATIME);
  int ito   = open(target_file, O_RDWR | O_CREAT | O_DIRECT, S_IRUSR | S_IWUSR);

  rcount_1 = read(ifrom, buf1, IO_BUF);

  for(;;)
  {
    pthread_create(&t1, NULL, r_func2, (void *)&ifrom);
    pthread_create(&t2, NULL, w_func1, (void *)&ito);
    pthread_join(t1, NULL);
    pthread_join(t2, NULL);

    if((rcount_1 == 0) || (rcount_2 == 0)) break;

    pthread_create(&t1, NULL, r_func1, (void *)&ifrom);
    pthread_create(&t2, NULL, w_func2, (void *)&ito);
    pthread_join(t1, NULL);
    pthread_join(t2, NULL);

    if((rcount_1 == 0) || (rcount_2 == 0)) break;
  }

  fstat(ifrom, &stat_buf);
  ftruncate(ito, stat_buf.st_size);

  times.actime  = stat_buf.st_atime;
  times.modtime = stat_buf.st_mtime;
  utime(target_file, &times);

  rcount_1 = -1;
  rcount_2 = -1;
  wcount_1 = -1;
  wcount_2 = -1;
  woffset = 0;

  close(ifrom);
  close(ito);
}

int main(int argc, char *argv[])
{
  char current_dir[PATH_LEN];
  char from[PATH_LEN];
  char to[PATH_LEN];
  char *from_basename;
  char *target_file;

  if(argc == 3)
  {
    if(g_file_test(argv[1], G_FILE_TEST_IS_REGULAR))
    {
      if(g_file_test(argv[2], G_FILE_TEST_IS_DIR))
      {
        if((realpath(argv[1], from) == NULL) || (realpath(argv[2], to) == NULL))
        {
          puts("絶対パスの作成に失敗しました");
          exit(-1);
        }

        getcwd(current_dir, sizeof(current_dir));
        printf("cwd is : %s\n", current_dir);
        printf("from   : %s\n", from);
        printf("to     : %s\n", to);

        //buf1 = malloc(IO_BUF);
        //buf2 = malloc(IO_BUF);
        posix_memalign((void *)&buf1, 512, IO_BUF);
        posix_memalign((void *)&buf2, 512, IO_BUF);

        chdir(to);

        from_basename = g_path_get_basename(from);
        target_file = g_build_path(G_DIR_SEPARATOR_S, to, from_basename, NULL);

        read_write(from, target_file);
      }
    }
  }

  else
  {
    puts("引数が正しくありません");
  }

  free(buf1);
  free(buf2);

  return 0;
}