Atlanta Custom Software Development 

 
   Search        Code/Page
 

User Login
Email

Password

 

Forgot the Password?
Services
» Web Development
» Maintenance
» Data Integration/BI
» Information Management
Programming
  Database
Automation
OS/Networking
Graphics
Links
Tools
» Regular Expr Tester
» Free Tools

Ternary QuickSort - A modification of QuickSort

Total Hit ( 2505)

Rate this article:     Poor     Excellent 

 Submit Your Question/Comment about this article

Rating


 


Click here to copy the following block
' Ternary QuickSort. See the summary of QuickSort for background before
' reading this one. Ternary QuickSort (also called MultiKey QuickSort) differs
' from the original QuickSort by examining keys one byte at a time (like
' RadixSort), and by handling keys in three categories -- less than pivot,
' equal to pivot, and more than pivot -- instead of two. (Separate handling
' of equal keys is why this sort is stable while ordinary QuickSort is not.) 
' As in QuickSort, the left and right sublists are processed recursively until
' they are short enough for InsertionSort. The equal-to-pivot sublist is also
' handled recursively, except that a pointer indicating the "depth" of the byte
' to be examined is advanced to the next position before the call. Since
' sublists handed off to InsertionSort consist of keys whose first bytes are
' identical up to DEPTH - 1, we use a special version of InsertionSort that
' compares keys only from DEPTH onward.
'
' Needs no extra arrays, but uses some stack space for recursion. In practice
' in VBA, I don't find this sort to be as fast as the original QuickSort,
' but results may differ in languages that offer faster access to bytes within
' strings. This algorithm is definitely oriented toward strings; see comments
' under MSD Radix Sort on possible adaptation to integers and longs.
'
' Reference: Jon Bentley and Robert Sedgewick, "Fast Algorithms for Sorting
' and Searching Strings", Proceedings of the 8th Annual ACM-SIAM Symposium on
' Discrete Algorithms, January 1997. See also http://www.cs.princeton.edu/~rs/
' strings/.
'
' Speed: TernaryQuickSort sorts 500,000 random strings in 39 sec; sorts 100186
' library call numbers in 14 sec; sorts 25479 dictionary words in 1.8 sec
' (random order), 1.9 sec (presorted) or 1.8 sec (reverse sorted). Timed in
' Excel 2001 on an 800 mhz PowerBook.
'
' Bottom line: a stable version of QuickSort good for strings,
' but not (at least in VBA) as fast as the original.

' Usage: 

Dim S1(L To R) As Strings
Dim B1(1 To nChars) As Byte
Dim P1(L To R) As Long

For I = L To R
  S1(I) = GetRandomString()
Next I

StrsToBytes S1, L, R, B1, P1    'a routine that stores the strings in 0-
                  ' terminated byte
              'arrays with P1() holding pointers to the first
              ' byte of
              'each string

TernaryQuickSort L, R, B1, P1

' CODE:

Sub TernaryQuickSort(L As Long, R As Long, B() As Byte, P() As Long)
  TernQuick L, R, B, P, 0
End Sub

Sub TernQuick(L As Long, R As Long, B() As Byte, P() As Long, _
  ByVal DEPTH As Integer)
  Dim TMP As Long
  Dim I As Long
  Dim J As Long
  Dim pMED As Long
  Dim Pivot As Integer
  Dim OuterL As Long
  Dim InnerL As Long
  Dim InnerR As Long
  Dim OuterR As Long
  Dim DIF As Long
  Dim N As Long
  Dim SwapN As Long
  Dim NLO As Long
  Dim NHI As Long
  Dim NEQ As Long

  N = 1 + R - L
  'Short sublists will be handled by lower overhead InsertionSort.
  If N > 10 Then
  'Get a pivot value from the median of three or nine keys.
    pMED = BGetMed(B, P, L, N, DEPTH)
  'Swap the median into the leftmost position.
    TMP = P(L)
    P(L) = P(pMED)
    P(pMED) = TMP
  'Our pivot will be the byte value at DEPTH.
    Pivot = B(P(L) + DEPTH)
  'Set up two pointers on the left and two on the right.
    OuterL = L
    InnerL = OuterL
    OuterR = R
    InnerR = OuterR
    Do
    'Look for a lefthand key/pointer to swap.
      Do While InnerL <= InnerR
     'DIF is the key's byte minus the Pivot byte.
        DIF = B(P(InnerL) + DEPTH) - Pivot
     'If the key's byte is greater, we've found a pointer to swap to the
     ' right side.
        If DIF > 0 Then Exit Do
     'If our byte is equal to the Pivot byte, we swap it to the extreme
     ' left end.
        If DIF = 0 Then
          TMP = P(OuterL)
          P(OuterL) = P(InnerL)
          P(InnerL) = TMP
          OuterL = OuterL + 1
        End If
     'If our byte is less than Pivot, we just scan over it.
        InnerL = InnerL + 1
      Loop
    'Now look for a righthand key/pointer to swap.
      Do While InnerL <= InnerR
        DIF = B(P(InnerR) + DEPTH) - Pivot
     'If the key's byte is less, we've found a pointer to swap to the left
     ' side.
        If DIF < 0 Then Exit Do
     'If our byte is equal to the Pivot byte, we swap it to the extreme
     ' right end.
        If DIF = 0 Then
          TMP = P(OuterR)
          P(OuterR) = P(InnerR)
          P(InnerR) = TMP
          OuterR = OuterR - 1
        End If
        InnerR = InnerR - 1
      Loop
    'If the inner pointers have crossed, we're done.
      If InnerL > InnerR Then Exit Do
    'Otherwise, we do the left/right swap we just set up.
      TMP = P(InnerL)
      P(InnerL) = P(InnerR)
      P(InnerR) = TMP
      InnerL = InnerL + 1
      InnerR = InnerR - 1
    Loop
  'We've arranged pointers to equal bytes on the far left and right,
  ' pointers to lower bytes
  'on the inner left, and pointers to higher bytes on the inner right. Now
  ' we will swap the
  'equals to the center, between the lowers and the highers.
    NLO = InnerL - OuterL
    NHI = OuterR - InnerR
    NEQ = N - (NLO + NHI)
    If OuterL - L < NLO Then SwapN = OuterL - L Else SwapN = NLO
    I = L
    J = InnerL - SwapN
  'Move the lefthand equals to center.
    Do While SwapN > 0
      TMP = P(I)
      P(I) = P(J)
      P(J) = TMP
      I = I + 1
      J = J + 1
      SwapN = SwapN - 1
    Loop
    If R - OuterR < NHI Then SwapN = R - OuterR Else SwapN = NHI
    I = InnerL
    J = R + 1 - SwapN
  'Move the righthand equals to center.
    Do While SwapN > 0
      TMP = P(I)
      P(I) = P(J)
      P(J) = TMP
      I = I + 1
      J = J + 1
      SwapN = SwapN - 1
    Loop
  'If there are more bytes, we increment DEPTH and recurse on the equals.
    If B(P(L+NLO) + DEPTH) <> 0 Then TernQuick L+NLO, L+NLO+NEQ-1, B, P, _
      DEPTH+1
  'Now we recurse on the lowers and highers; we do the shorter sublist first
  ' to hold stack
  'depth to log N.
    If NLO < NHI Then
      TernQuick L, L + NLO - 1, B, P, DEPTH
      TernQuick L + NLO + NEQ, L + NLO + NEQ + NHI - 1, B, P, DEPTH
    Else
      TernQuick L + NLO + NEQ, L + NLO + NEQ + NHI - 1, B, P, DEPTH
      TernQuick L, L + NLO - 1, B, P, DEPTH
    End If
  Else
  'A special version of InsertionSort that compares keys starting at depth
  ' (since the
  'sublists we hand off to it will have identical prefixes.
    DeepInsertS B, P, L, N, DEPTH
  End If
End Sub

Function BGetMed(B() As Byte, P() As Long, L As Long, N As Long, _
  DEPTH As Integer) As Long
  Dim D As Long
  Dim PL As Long
  Dim PM As Long
  Dim PN As Long

    PL = L
    PN = L + N - 1
    PM = (PL + PN) \ 2
    If N > 30 Then
      D = N \ 8
      PL = BMed3(B, P, PL, PL + D, PL + 2 * D, DEPTH)
      PM = BMed3(B, P, PM - D, PM, PM + D, DEPTH)
      PL = BMed3(B, P, PN - 2 * D, PN - D, PN, DEPTH)
    End If
    BGetMed = BMed3(B, P, PL, PM, PN, DEPTH)
End Function

Function BMed3(B() As Byte, P() As Long, I As Long, J As Long, K As Long, _
  DEPTH As Integer) As Long
  Dim CI As Byte
  Dim CJ As Byte
  Dim CK As Byte
  
  CI = B(P(I) + DEPTH)
  CJ = B(P(J) + DEPTH)
  CK = B(P(K) + DEPTH)
  If (CI <= CJ And CJ <= CK) Or (CI >= CJ And CJ >= CK) Then
    BMed3 = J
  ElseIf (CJ <= CI And CI <= CK) Or (CJ >= CI And CI >= CK) Then
    BMed3 = I
  ElseIf (CI <= CK And CK <= CJ) Or (CI >= CK And CK >= CJ) Then
    BMed3 = K
  End If
End Function

Sub DeepInsertS(B() As Byte, P() As Long, L As Long, N As Long, D As Integer)
  Dim LP As Long
  Dim RP As Long
  Dim TMP As Long
  Dim I As Long
  Dim J As Long
  
  For RP = L + 1 To L + N - 1
    TMP = P(RP)
    For LP = RP To L + 1 Step -1
      I = TMP + D
      J = P(LP - 1) + D
      Do While B(I) = B(J)
        If B(I) = 0 Or B(J) = 0 Then Exit Do
        I = I + 1
        J = J + 1
      Loop
      If CInt(B(I)) - CInt(B(J)) < 0 Then P(LP) = P(LP - 1) Else Exit For
    Next LP
    P(LP) = TMP
  Next RP
End Sub

Sub Strs2Bytes(A() As String, L As Long, R As Long, B() As Byte, P() As Long)
  Dim I As Long
  Dim nPtrs As Long
  Dim nBytes As Long
  Dim DEPTH As Integer
  
  nBytes = 0
  nPtrs = 0
  For I = L To R
    nBytes = nBytes + Strings.Len(A(I)) + 1
    nPtrs = nPtrs + 1
  Next I
  ReDim B(1 To nBytes)
  ReDim P(1 To nPtrs)
  
  nPtrs = 1
  nBytes = 1
  For I = L To R
    P(nPtrs) = nBytes
    For DEPTH = 1 To Strings.Len(A(I))
      B(nBytes) = Asc(Strings.MID(A(I), DEPTH, 1))
      nBytes = nBytes + 1
    Next DEPTH
    B(nBytes) = 0
    nBytes = nBytes + 1
    nPtrs = nPtrs + 1
  Next I
End Sub


Submitted By : Nayan Patel  (Member Since : 5/26/2004 12:23:06 PM)

Job Description : He is the moderator of this site and currently working as an independent consultant. He works with VB.net/ASP.net, SQL Server and other MS technologies. He is MCSD.net, MCDBA and MCSE. In his free time he likes to watch funny movies and doing oil painting.
View all (893) submissions by this author  (Birth Date : 7/14/1981 )


Home   |  Comment   |  Contact Us   |  Privacy Policy   |  Terms & Conditions   |  BlogsZappySys

© 2008 BinaryWorld LLC. All rights reserved.