1
votes

I already have an iterative version merge sort algorithm to sort linked list.

Following version work well in general case.

Roughly describe my algorithm:

I use button up, first iterate I merge 2 nodes and 2 nodes into 4 nodes which is sorted, and so on...

typedef struct ELE {
    char *value;
    struct ELE *next;
} list_ele_t;

/* Queue structure */
typedef struct {
    list_ele_t *head; /* Linked list of elements */
    list_ele_t *tail; /* Linked list of elements */
    int size;
} queue_t;

void q_sort(queue_t *q)
{
    if (!q || !q->head || q->size == 1)
        return;

    int block_size = 1, n = q->size, i, alen, blen;
    list_ele_t virtual_head;
    list_ele_t *last = NULL, *it = NULL, *l1 = NULL, *l2 = NULL, *tmp = NULL;
    // list_ele_t *l1head = NULL, *l1tail = NULL, *l2tail = NULL;
    virtual_head.next = q->head;
    while (block_size < n) {
        int iter = 0;
        last = &virtual_head;
        it = virtual_head.next;
        while (iter < n) {
            // decide each iterate time's block size a and b
            alen = min(n - iter, block_size);
            // avoid odd block
            blen = min(n - iter - alen, block_size);

            l1 = it;
            l1head = l1;
            // if left block is odd, just skip
            if (blen != 0) {
                // seperate one list in to l1 and l2
                for (i = 0; i < alen - 1; ++i)
                    it = it->next;
                l1tail = it;
                l2 = it->next;
                it->next = NULL;
                it = l2;
                for (i = 0; i < blen - 1; ++i)
                    it = it->next;
                l2tail = it;
                tmp = it->next;
                it->next = NULL;
                it = tmp;
            }


            while (l1 || l2) {
                if (l2 == NULL ||
                    (l1 != NULL && strcmp(l1->value, l2->value) < 0)) {
                    // if l2 doesn't contain any node, just append l1 to
                    // merge list or if value of l1 is smaller
                    last->next = l1;
                    last = last->next;
                    l1 = l1->next;
                } else {
                    // if l1 doesn't contain any node, just append l2 to
                    // merge list or if value of l2 is smaller
                    last->next = l2;
                    last = last->next;
                    l2 = l2->next;
                }
            }
            last->next = NULL;
            iter += alen + blen;
        }
        block_size <<= 1;
    }

    q->head = virtual_head.next;
    list_ele_t *nd = q->head;
    while (nd->next) {
        nd = nd->next;
    }
    q->tail = nd;
} 

When this algorithm encounter very long linked list will be very slow, but this kind of testbench have some specific format like have several repetitive section. e.g. AAAAABBBBBBB

As a result, I add some code into origin version. I check whether the member of list 1 (l1) and list 2 (l2) are same. If so, I skip merge step, just append l2 to l1

Following is new version:

void q_sort(queue_t *q)
{
    long long cnt = 0;
    if (!q || !q->head || q->size == 1)
        return;
    int block_size = 1, n = q->size, i, alen, blen;
    list_ele_t virtual_head;
    list_ele_t *last = NULL, *it = NULL, *l1 = NULL, *l2 = NULL, *tmp = NULL;
    list_ele_t *l1head = NULL, *l1tail = NULL, *l2tail = NULL;
    virtual_head.next = q->head;
    while (block_size < n) {
        int iter = 0;
        last = &virtual_head;
        it = virtual_head.next;
        while (iter < n) {
            // decide each iterate time's block size a and b
            alen = min(n - iter, block_size);
            // avoid odd block
            blen = min(n - iter - alen, block_size);
            // printf("%d %d block size: %d\n",alen,blen,block_size);

            l1 = it;
            l1head = l1;
            // if left block is odd, just skip
            if (blen != 0) {
                // seperate one list in to l1 and l2
                for (i = 0; i < alen - 1; ++i)
                    it = it->next;
                l1tail = it;
                l2 = it->next;
                it->next = NULL;
                it = l2;
                for (i = 0; i < blen - 1; ++i)
                    it = it->next;
                l2tail = it;
                tmp = it->next;
                it->next = NULL;
                it = tmp;
            }

            if (l1head && l2tail && strcmp(l1head->value, l2tail->value) == 0
                && (l1tail && l2 && strcmp(l1tail->value,l2->value) == 0)) {
                iter += alen + blen;
                last->next = l1;
                l1tail->next = l2;
                last = l2tail;
                l2tail->next = NULL;
            } else {
                while (l1 || l2) {
                    if (l2 == NULL ||
                        (l1 != NULL && strcmp(l1->value, l2->value) < 0)) {
                        // if l2 doesn't contain any node, just append l1 to
                        // merge list or if value of l1 is smaller
                        last->next = l1;
                        last = last->next;
                        l1 = l1->next;
                    } else {
                        // if l1 doesn't contain any node, just append l2 to
                        // merge list or if value of l2 is smaller
                        last->next = l2;
                        last = last->next;
                        l2 = l2->next;
                    }
                }
                last->next = NULL;
                iter += alen + blen;
            }
        }
        block_size <<= 1;
    }

    q->head = virtual_head.next;
    list_ele_t *nd = q->head;
    while (nd->next) {
        nd = nd->next;
    }
    q->tail = nd;
}

Concept is simple but my addition seems wrong. Error message told me maybe there exist infinite loop.

First, I wonder are there any mistake I make when I add this section? how to fix it?

if (l1head && l2tail && strcmp(l1head->value, l2tail->value) == 0
            && (l1tail && l2 && strcmp(l1tail->value,l2->value) == 0)) {
            iter += alen + blen;
            last->next = l1;
            l1tail->next = l2;
            last = l2tail;
            l2tail->next = NULL;
        }

Second, are there better way to accelerate this specific format linked list? ( e.g. C->C->C->C->B->B.....B)

Thanks all of yours advice!

1
Working with lists is always going to be iterative one way or another. Other types of containers (e.g. arrays) could easily be done by divide and conquer, where each "divide" could be done in parallel (e.g. through threads). While it's somewhat possible with lists, you still need to iterate over the full list to create the sub-lists. And merging the sub-lists still needs to be done in an iterative way. (And I count recursion as a special-case of iteration.)Some programmer dude
Singly Linked List of Integers (mergesort) (note: if greater than 100,000 nodes, you need to switch to a Bottom-up mergesort)David C. Rankin
Using a small (25 to 32) array of pointers to nodes, where array[i] points to a list with 2^i nodes, is normally how bottom up merge sort is implemented for linked lists. Nodes are merged into the array one at a time, then the array merged to form a single sorted list. Take a look at the wiki example .rcgldr

1 Answers

0
votes

Example code that uses a small (32) array of "lists", where array[i] is a list with 2^i nodes (except last entry is unlimited size). This is probably the fastest way to sort a list using a list. It's usually faster to copy the list to an array, sort the array, and create a new sorted list from the array. The input parameter is a pointer to the first node of a list, and the returned value is a pointer to the first node of the sorted list. After sorting a list, the calling code will need to scan the sorted list to set the tail pointer in the list structure. The comparison in merge uses "src2 < src1" to follow the conventions used in C++ template sorts.

typedef struct NODE_{
struct NODE_ * next;
uint64_t data;
}NODE;

NODE * MergeLists(NODE *, NODE *);      /* prototype */

#define NUMLISTS 32                     /* number of lists */

NODE * SortList(NODE *pList)
{
NODE * aList[NUMLISTS];                 /* array of lists */
NODE * pNode;
NODE * pNext;
size_t i;
    if(pList == NULL)                   /* check for empty list */
        return NULL;
    for(i = 0; i < NUMLISTS; i++)       /* zero array */
        aList[i] = NULL;
    pNode = pList;                      /* merge nodes into array */
    while(pNode != NULL){
        pNext = pNode->next;
        pNode->next = NULL;
        for(i = 0; (i < NUMLISTS) && (aList[i] != NULL); i++){
            pNode = MergeLists(aList[i], pNode);
            aList[i] = NULL;
        }
        if(i == NUMLISTS)
            i--;
        aList[i] = pNode;
        pNode = pNext;
    }
    pNode = NULL;                       /* merge array into one list */
    for(i = 0; i < NUMLISTS; i++)
        pNode = MergeLists(aList[i], pNode);
    return pNode;
}

NODE * MergeLists(NODE *pSrc1, NODE *pSrc2)
{
NODE *pDst = NULL;                      /* destination head ptr */
NODE **ppDst = &pDst;                   /* ptr to head or prev->next */
    if(pSrc1 == NULL)
        return pSrc2;
    if(pSrc2 == NULL)
        return pSrc1;
    while(1){
        if(pSrc2->data < pSrc1->data){  /* if src2 < src1 */
            *ppDst = pSrc2;
            pSrc2 = *(ppDst = &pSrc2->next);
            if(pSrc2 == NULL){
                *ppDst = pSrc1;
                break;
            }
        } else {                        /* src1 <= src2 */
            *ppDst = pSrc1;
            pSrc1 = *(ppDst = &pSrc1->next);
            if(pSrc1 == NULL){
                *ppDst = pSrc2;
                break;
            }
        }
    }
    return pDst;
}