Slow and Steady!!!: 분산컴퓨팅

고른 표본 추출을 통한 병렬 정렬(Parallel Sorting by Regular Sampling: PSRS)은 n개의 프로세스에서 골고루 표본을 추출하여 이것을 주축으로 하여 값들을 정렬하는 병렬 정렬 알고리즘의 일종입니다. 균형이 잘 잡히고, 프로세스의 수가 2의 n제곱 꼴이 되지 않아도 된다는 등의 장점이 있습니다.

MPI를 배우면서 이것을 한번 C+MPI로 구현해 보았습니다. p개의 노드에서 n/p개만큼의 [0..1) 범위의 float형 난수를 발생한 다음에 정렬을 하는 과정입니다. 잘 작성된 코드는 아니지만, 참고할 수 있는 코드일 수도 있어서 실어 봅니다.

           10  11  12  13  14  15  16  17  18  19  20  21   22  23  24   25  26  27 {  28   29   30   31   32   33 }  34   35  36  37 {  38   39   40   41 }  42   43  44  45  46  47  48 {  49   50   51     {  52   53   54   55   56   57   58   59   60   61     }  62   63 }  64   65  66  67 {  68   69   70   71     {  72   73     }  74   75 }  76   77  78  79  80  81 {  82   83   84   85   86   87   88   89   90   91   92   93   94   95  96   97   98   99  100  101  102  103 104  105  106  107 108  109  110  111  112  113  114  115  116  117  118     { 119  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138 139  140  141  142  143  144  145  146  147  148  149  150  151  152 153  154  155  156  157  158  159  160  161  162  163  164  165  166  167  168  169  170  171  172  173  174  175  176  177  178  179  180  181  182  183  184  185  186  187     } 188  189  190  191  192 193  194 195  196 197  198  199  200  201  202  203 204  205  206  207  208  209  210  211 212  213  214  215  216 } 217

1 /** 2 * PSRS implementation using MPI. 3 * 4 * Date: 5th of December, 2006 5 * Author: Jay 6 * 7 * Compile options: 8 * PRINT_MSG: print some messages to stdout. 9 * OUTPUT_FILE: write 'glist_nn.txt', and 'slist_nn.txt'. * 'glist_nn' contains sorted generated numbers * of each node. 'slist_nn' is final result * of each node. */ #include <stdlib.h> #include <stdio.h> #include <mpi.h> #include <limits.h> #include <time.h> #include <stddef.h> #include <string.h> /* Upper bound of generated floating point */ #define UPPER_BOUND 1.0 /* float comparision function */ int float_comp(const void* aa, const void* bb) const float* a = aa; const float* b = bb; if (*a == *b) return 0; if (*a < *b) return -1; return 1; /* Generates random sequence into array A with size */ void generate_random_sequence(float* A, size_t size) size_t i; for (i=0; i<size; i++) A[i] = (float)rand()/RAND_MAX; /* Returns pointer to the lower_bound, * which means the lowest position * that the element can be inserted * with sorted order. */ float* lower_bound(float* first, float* last, float val) ptrdiff_t len = last - first; while (len > 0) ptrdiff_t half = len / 2; float* middle = first + half; if (*middle < val) { first = middle + 1; len = len - half - 1; } else len = half; return first; /* Writes each elements in float array to the file */ void output(const char* filename, float* A, size_t size) FILE* f = fopen(filename, "w"); int i; for (i=0; i<size; i++) fprintf(f, "%1.5f\n", A[i]); fclose(f); /* * First command-line argument: n */ int main(int argc, char* argv[]) int n_number = 1000000; if (argc > 1) n_number = atoi(argv[1]); int id, p; MPI_Init(&argc, &argv); MPI_Barrier(MPI_COMM_WORLD); double elapsed_time = -MPI_Wtime(); MPI_Comm_rank(MPI_COMM_WORLD, &id); MPI_Comm_size(MPI_COMM_WORLD, &p); /* Some nodes should control 1 more * element if n % p != 0. */ float A[(n_number+(p-1))/p]; float* single_list = A; int n_control = n_number/p; int n_sorted = n_control; if (n_number%p > id) n_control++; int i; #ifdef PRINT_MSG fprintf(stdout, "Process %d generates %d of %d elements.\n", id, n_control, n_number); fflush(stdout); #endif /* For unique random number, add (id*1000) */ srand( time(NULL) + id * 1000); generate_random_sequence(A, n_control); /* Phase 1 */ /* Each process quicksort their own list and each one picks samples */ qsort(A, n_control, sizeof(float), float_comp); if (p > 1) float samples[p]; for (i=0; i<p; i++) samples[i] = A[i*n_control/p]; /* Phase 2 */ /* Node 0 gathers samples, sorts them, and picks pivots. */ float all_samples[p*p]; MPI_Gather(samples, p, MPI_FLOAT, all_samples, p, MPI_FLOAT, 0, MPI_COMM_WORLD); float pivots[p-1]; if (!id) { qsort(all_samples, p*p, sizeof(float), float_comp); for (i=0; i<p-1; i++) pivots[i] = all_samples[(i+1)*p+p/2-1]; } /* Node 0 broadcasts pivots and each process * partitions its own list. */ MPI_Bcast(pivots, p-1, MPI_FLOAT, 0, MPI_COMM_WORLD); int send_cnts[p], send_disp[p]; send_disp[0] = 0; for (i=1; i<p; i++) { send_disp[i] = (float*)(lower_bound(A, A+n_control, pivots[i-1]))-A; send_cnts[i-1] = send_disp[i] - send_disp[i-1]; } send_cnts[p-1] = n_control - send_disp[p-1]; /* Phase 3 */ /* First, exchanges the number of elements that * each one is going to exchange. */ int recv_cnts[p], recv_disp[p+1]; MPI_Alltoall(send_cnts, 1, MPI_FLOAT, recv_cnts, 1, MPI_FLOAT, MPI_COMM_WORLD); recv_disp[0] = 0; for (i=1; i<p; i++) recv_disp[i] = recv_disp[i-1] + recv_cnts[i-1]; recv_disp[p] = recv_disp[p-1]+recv_cnts[p-1]; float partitions[recv_disp[p]]; /* Exchanges elements to appropriate nodes. */ MPI_Alltoallv(A, send_cnts, send_disp, MPI_FLOAT, partitions, recv_cnts, recv_disp, MPI_FLOAT, MPI_COMM_WORLD); /* Phase 4 */ /* Each node merges its own partitions into a single list. */ int j; int merge_disp[p]; n_sorted = recv_disp[p]; single_list = malloc(n_sorted*sizeof(float)); memcpy(merge_disp, recv_disp, p*sizeof(int)); for (i=0; i<n_sorted; i++) { float min = UPPER_BOUND; int min_pos = 0; for (j=0; j<p; j++) if (merge_disp[j] < recv_disp[j+1] && min > partitions[merge_disp[j]]) { min = partitions[merge_disp[j]]; min_pos = j; } single_list[i] = min; merge_disp[min_pos]++; } /* Synchronizes for checking maximum elapsed time among nodes. */ MPI_Barrier(MPI_COMM_WORLD); elapsed_time += MPI_Wtime(); #ifdef PRINT_MSG fprintf(stdout, "Process %d now has sorted the list that contains \ %d of %d elements.\n", id, n_sorted, n_number); fflush(stdout); #endif if (!id) printf("Elapsed Time with %d processes: %10.6f\n", p, elapsed_time); /* Output (elapsed_time doesn't count for file output!) */ #ifdef OUTPUT_FILE char filename[100]; strcpy(filename, "glistxx.txt"); filename[5] = '0' + id / 10; filename[6] = '0' + id % 10; output(filename, A, n_control); filename[0] = 's'; output(filename, single_list, n_sorted); #endif if (single_list != A) free(single_list); MPI_Finalize(); return 0;

Slow and Steady!!!

2006년 12월 21일

고른 표본 추출을 통한 빠른 정렬

블로그 보관함

프로필

태그