Slow and Steady!!!: C언어

고른 표본 추출을 통한 병렬 정렬(Parallel Sorting by Regular Sampling: PSRS)은 n개의 프로세스에서 골고루 표본을 추출하여 이것을 주축으로 하여 값들을 정렬하는 병렬 정렬 알고리즘의 일종입니다. 균형이 잘 잡히고, 프로세스의 수가 2의 n제곱 꼴이 되지 않아도 된다는 등의 장점이 있습니다.

MPI를 배우면서 이것을 한번 C+MPI로 구현해 보았습니다. p개의 노드에서 n/p개만큼의 [0..1) 범위의 float형 난수를 발생한 다음에 정렬을 하는 과정입니다. 잘 작성된 코드는 아니지만, 참고할 수 있는 코드일 수도 있어서 실어 봅니다.

           10  11  12  13  14  15  16  17  18  19  20  21   22  23  24   25  26  27 {  28   29   30   31   32   33 }  34   35  36  37 {  38   39   40   41 }  42   43  44  45  46  47  48 {  49   50   51     {  52   53   54   55   56   57   58   59   60   61     }  62   63 }  64   65  66  67 {  68   69   70   71     {  72   73     }  74   75 }  76   77  78  79  80  81 {  82   83   84   85   86   87   88   89   90   91   92   93   94   95  96   97   98   99  100  101  102  103 104  105  106  107 108  109  110  111  112  113  114  115  116  117  118     { 119  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138 139  140  141  142  143  144  145  146  147  148  149  150  151  152 153  154  155  156  157  158  159  160  161  162  163  164  165  166  167  168  169  170  171  172  173  174  175  176  177  178  179  180  181  182  183  184  185  186  187     } 188  189  190  191  192 193  194 195  196 197  198  199  200  201  202  203 204  205  206  207  208  209  210  211 212  213  214  215  216 } 217

1 /** 2 * PSRS implementation using MPI. 3 * 4 * Date: 5th of December, 2006 5 * Author: Jay 6 * 7 * Compile options: 8 * PRINT_MSG: print some messages to stdout. 9 * OUTPUT_FILE: write 'glist_nn.txt', and 'slist_nn.txt'. * 'glist_nn' contains sorted generated numbers * of each node. 'slist_nn' is final result * of each node. */ #include <stdlib.h> #include <stdio.h> #include <mpi.h> #include <limits.h> #include <time.h> #include <stddef.h> #include <string.h> /* Upper bound of generated floating point */ #define UPPER_BOUND 1.0 /* float comparision function */ int float_comp(const void* aa, const void* bb) const float* a = aa; const float* b = bb; if (*a == *b) return 0; if (*a < *b) return -1; return 1; /* Generates random sequence into array A with size */ void generate_random_sequence(float* A, size_t size) size_t i; for (i=0; i<size; i++) A[i] = (float)rand()/RAND_MAX; /* Returns pointer to the lower_bound, * which means the lowest position * that the element can be inserted * with sorted order. */ float* lower_bound(float* first, float* last, float val) ptrdiff_t len = last - first; while (len > 0) ptrdiff_t half = len / 2; float* middle = first + half; if (*middle < val) { first = middle + 1; len = len - half - 1; } else len = half; return first; /* Writes each elements in float array to the file */ void output(const char* filename, float* A, size_t size) FILE* f = fopen(filename, "w"); int i; for (i=0; i<size; i++) fprintf(f, "%1.5f\n", A[i]); fclose(f); /* * First command-line argument: n */ int main(int argc, char* argv[]) int n_number = 1000000; if (argc > 1) n_number = atoi(argv[1]); int id, p; MPI_Init(&argc, &argv); MPI_Barrier(MPI_COMM_WORLD); double elapsed_time = -MPI_Wtime(); MPI_Comm_rank(MPI_COMM_WORLD, &id); MPI_Comm_size(MPI_COMM_WORLD, &p); /* Some nodes should control 1 more * element if n % p != 0. */ float A[(n_number+(p-1))/p]; float* single_list = A; int n_control = n_number/p; int n_sorted = n_control; if (n_number%p > id) n_control++; int i; #ifdef PRINT_MSG fprintf(stdout, "Process %d generates %d of %d elements.\n", id, n_control, n_number); fflush(stdout); #endif /* For unique random number, add (id*1000) */ srand( time(NULL) + id * 1000); generate_random_sequence(A, n_control); /* Phase 1 */ /* Each process quicksort their own list and each one picks samples */ qsort(A, n_control, sizeof(float), float_comp); if (p > 1) float samples[p]; for (i=0; i<p; i++) samples[i] = A[i*n_control/p]; /* Phase 2 */ /* Node 0 gathers samples, sorts them, and picks pivots. */ float all_samples[p*p]; MPI_Gather(samples, p, MPI_FLOAT, all_samples, p, MPI_FLOAT, 0, MPI_COMM_WORLD); float pivots[p-1]; if (!id) { qsort(all_samples, p*p, sizeof(float), float_comp); for (i=0; i<p-1; i++) pivots[i] = all_samples[(i+1)*p+p/2-1]; } /* Node 0 broadcasts pivots and each process * partitions its own list. */ MPI_Bcast(pivots, p-1, MPI_FLOAT, 0, MPI_COMM_WORLD); int send_cnts[p], send_disp[p]; send_disp[0] = 0; for (i=1; i<p; i++) { send_disp[i] = (float*)(lower_bound(A, A+n_control, pivots[i-1]))-A; send_cnts[i-1] = send_disp[i] - send_disp[i-1]; } send_cnts[p-1] = n_control - send_disp[p-1]; /* Phase 3 */ /* First, exchanges the number of elements that * each one is going to exchange. */ int recv_cnts[p], recv_disp[p+1]; MPI_Alltoall(send_cnts, 1, MPI_FLOAT, recv_cnts, 1, MPI_FLOAT, MPI_COMM_WORLD); recv_disp[0] = 0; for (i=1; i<p; i++) recv_disp[i] = recv_disp[i-1] + recv_cnts[i-1]; recv_disp[p] = recv_disp[p-1]+recv_cnts[p-1]; float partitions[recv_disp[p]]; /* Exchanges elements to appropriate nodes. */ MPI_Alltoallv(A, send_cnts, send_disp, MPI_FLOAT, partitions, recv_cnts, recv_disp, MPI_FLOAT, MPI_COMM_WORLD); /* Phase 4 */ /* Each node merges its own partitions into a single list. */ int j; int merge_disp[p]; n_sorted = recv_disp[p]; single_list = malloc(n_sorted*sizeof(float)); memcpy(merge_disp, recv_disp, p*sizeof(int)); for (i=0; i<n_sorted; i++) { float min = UPPER_BOUND; int min_pos = 0; for (j=0; j<p; j++) if (merge_disp[j] < recv_disp[j+1] && min > partitions[merge_disp[j]]) { min = partitions[merge_disp[j]]; min_pos = j; } single_list[i] = min; merge_disp[min_pos]++; } /* Synchronizes for checking maximum elapsed time among nodes. */ MPI_Barrier(MPI_COMM_WORLD); elapsed_time += MPI_Wtime(); #ifdef PRINT_MSG fprintf(stdout, "Process %d now has sorted the list that contains \ %d of %d elements.\n", id, n_sorted, n_number); fflush(stdout); #endif if (!id) printf("Elapsed Time with %d processes: %10.6f\n", p, elapsed_time); /* Output (elapsed_time doesn't count for file output!) */ #ifdef OUTPUT_FILE char filename[100]; strcpy(filename, "glistxx.txt"); filename[5] = '0' + id / 10; filename[6] = '0' + id % 10; output(filename, A, n_control); filename[0] = 's'; output(filename, single_list, n_sorted); #endif if (single_list != A) free(single_list); MPI_Finalize(); return 0;

기사의 여행은 체스판 위의 임의의 위치에서 기사가 출발하여 각 위치를 오직 한 번만 방문하면서 모든 위치를 방문하는 순서를 구하는 문제입니다. 오일러 등의 많은 수학자들이 이 문제를 다루었으며, 다양한 해법과 변형된 문제들이 있습니다. 가장 일반적인 해법은 되추적을 이용한 방법입니다.

19세기의 H. C. Warnsdorff는 기사의 여행 문제를 푸는 실용적인 방법을 제시하였습니다. 기사가 움직이면서 어느 곳으로도 움직일 수 없는 막다른 곳에 다다르지 않게 하는 것이 목적입니다. 막다른 곳에 다다르지 않게 하기 위하여 Warnsdorff가 제시한 규칙은 현재 기사가 한 번에 갈 수 있는 곳 중에서 다음 번 수에 갈 수 있는 곳이 가장 적은 곳으로 간다는 규칙입니다. 이 방법은 휴리스틱한 방법입니다만, 8 x 8의 체스판 공간에서 기사의 여행 문제를 잘 풀어 줍니다. 체스판의 공간이 넓어지면 제대로 풀리지 않는 경우가 생길 수 있습니다.

Warnsdorff의 규칙을 이용하여 기사의 여행 문제를 푸는 간단한 프로그램을 작성해 보았습니다. C언어로 작성되어 있고 시작행과 시작열의 위치를 기본 입력에서 읽어서 기본 출력으로 해를 출력해 줍니다.

  1 /*
  2  * Knight's Tour.
  3  *
  4  * Author: Jay
  5  * Date: 20th of December, 2006
  6  */
  7 #include <stdio.h>
  8 
  9 /* definitions */
 10 #define ROW_SIZE 8
 11 #define COL_SIZE 8
 12 #define NUM_WAYS 8
 13 typedef int board_t[ROW_SIZE][COL_SIZE];
 14 int dr[NUM_WAYS] = {-2, -1, 1, 2, 2, 1, -1, -2};
 15 int dc[NUM_WAYS] = {1, 2, 2, 1, -1, -2, -2, -1};
 16 
 17 /**
 18  * Set every element to -1
 19  */
 20 void initialize_board(board_t board)
 21 {
 22     int i, j;
 23     for (i=0; i<ROW_SIZE; i++)
 24         for (j=0; j<COL_SIZE; j++)
 25             board[i][j] = -1;
 26 }
 27 
 28 /**
 29  * Print the board out.
 30  */
 31 void print_board(board_t board)
 32 {
 33     int i, j;
 34     for (i=0; i<ROW_SIZE; i++)
 35     {
 36         for (j=0; j<COL_SIZE; j++)
 37             printf("%d\t", board[i][j]);
 38         printf("\n");
 39     }
 40 }
 41 
 42 /**
 43  * Check if (r,c) is inside board.
 44  * @return true if (r,c) is inside board, false otherwise.
 45  */
 46 int is_inside_board(int r, int c)
 47 {
 48     return r >= 0 && r < ROW_SIZE && c >= 0 && c < COL_SIZE;
 49 }
 50 
 51 /**
 52  * Check if (r,c) is available in board.
 53  * @return true if (r,c) is available, that is has value -1,
 54  *         false otherwise.
 55  */
 56 int is_available(board_t board, int r, int c)
 57 {
 58     return is_inside_board(r, c) && board[r][c] == -1;
 59 }
 60 
 61 /**
 62  * @return number of next moves of (r,c) in board.
 63  */
 64 int num_next_moves(board_t board, int r, int c)
 65 {
 66     int i, result=0;
 67     for (i=0; i<NUM_WAYS; i++)
 68         if (is_available(board, r+dr[i], c+dc[i]))
 69             result++;
 70     return result;
 71 }
 72 
 73 /**
 74  * Get next way id from (r,c) in board.
 75  * Next way is the way whose destination has minimal number of next moves.
 76  * @return next way id, which is in [0, NUM_WAYS).
 77  */
 78 int next_way_of(board_t board, int r, int c)
 79 {
 80     int i, min = NUM_WAYS, result=0;
 81     for (i=0; i<NUM_WAYS; i++)
 82         if (is_available(board, r+dr[i], c+dc[i])
 83                 && num_next_moves(board, r+dr[i], c+dc[i]) < min)
 84         {
 85             min = num_next_moves(board, r+dr[i], c+dc[i]);
 86             result = i;
 87         }
 88     return result;
 89 }
 90 
 91 /**
 92  * Get r, c from user and solve knight tour problem.
 93  * Print result out.
 94  * @return 0 for successful moves, 1 otherwise.
 95  */
 96 int main()
 97 {
 98     int r, c, move, next_way;
 99     board_t board;
100 
101     initialize_board(board);
102     while (1)
103     {
104         printf("Input start position r c: ");
105         scanf("%d %d", &r, &c);
106         fflush(stdin);
107         if (is_inside_board(r, c)) break;
108         printf("Please put them again.\n");
109     }
110     board[r][c] = 0;
111 
112     for (move=1; move<ROW_SIZE*COL_SIZE; move++)
113     {
114         if (num_next_moves(board, r, c) == 0)
115         {
116             printf("Failed.\n");
117             print_board(board);
118             return 1;
119         }
120         next_way = next_way_of(board, r, c);
121         r = r + dr[next_way];
122         c = c + dc[next_way];
123         board[r][c] = move;
124     }
125     print_board(board);
126 
127     return 0;
128 }

참고할 수 있는 URL:
Warnsdorff's rule - 영문 페이지

Slow and Steady!!!

2006년 12월 21일

고른 표본 추출을 통한 빠른 정렬

2006년 12월 20일

기사의 여행

블로그 보관함

프로필

태그