Burstsort
Burstsort e suas variantes são algoritmos cache-eficientes para a ordenação de strings, e são mais rápidos que o quicksort e o radix sort para grandes conjuntos de dados.
Burstsort | |
---|---|
classe | Algoritmo de ordenação |
estrutura de dados | Array, Listas ligadas |
complexidade pior caso | |
Algoritmos | |
Os algoritmos Burstsort usam tentativas para armazenar prefixos de strings, com arrays crescentes de ponteiros como nodos final contendo sufixos ordenados, únicos (referidos como buckets (baldes)). Algumas variantes copiar as caudas das strings nos buckets. A medida em que os buckets crescem além de um limite pré-determinado, os buckets são "estourados" (burst), dando ao algoritmo o seu nome. Uma variante mais recente, usa um bucket com menor índice de sub-buckets para reduzir o uso de memória.
Implementações
editarC++
editarusing System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace burstsort
{
class Program
{
static void Main(string[] args)
{
// we have to give an array of string to get sorted
string[] s = { "pole", "Aisha", "Try", "games", "Team" };
BurstSort b = new BurstSort();
b.insert(s);
b.print();
}
}
/*
* Create an instance of BurstSort
*/
class BurstSort
{
/** Maximum number of elements in bucket */
static int threshold = 32;
/** Size of the alphabet that is supported. */
static int alphbet = 127;
/** Initial size for new buckets. */
static int bucket_start_size = 4;
/** The bucket growth factor */
static int bucket_growth_factor = 4;
/*
* Similar to TrieADT we create BurstTrie
*/
#region BurstTrie
Trie root;// Starting point of BurstTrie
public BurstSort()
{
root = null;
}
/*
* "Insertion phase"
* Inserting string prefix in trie and suffixes in buckets
*/
public void insert(string[] s)
{
foreach (string i in s)
{
root = insert(i, root);
}
/* After insertion phase we traverse to sort the data */
traverse();
}
private Trie insert(string s, Trie node)
{
if (node == null)
{
Trie n = new Trie();
char c = charAt(s);
n.insert(c, s);
return n;
}
else
{
node.insert(s[0], s);
return node;
}
}
/*
* "Traversal phase"
* Travel BurstTrie from left to right
* to sort data
*/
public void traverse()
{
root.traverse(0);
}
//to print we use print function
public void print()
{
root.print(0);
}
// utility function return the First character of string
public char charAt(string s)
{
return s[0];
}
#endregion
//Trie node
class Trie
{
Buckets[] buckets;
List<int> c;
public Trie()
{
c = new List<int>();
buckets = new Buckets[alphbet];
}
/** Inserting Suffixes of particular prefix in buckets */
public void insert(int x, string s)
{
if (!c.Contains(x))
{
buckets[x] = new Buckets();
buckets[x].insert(s.Substring(1));
c.Add(x);
}
else
{
buckets[x].insert(s.Substring(1));
}
}
public void traverse(int ch)
{
//Sorting Prefixes
for (int i = 0; i < c.Count; i++)
{
for (int j = 0; j < c.Count; j++)
{
if (c[i] < c[j])
{
int temp = c[i];
c[i] = c[j];
c[j] = temp;
}
}
}
for (int i = 0; i < c.Count; i++)//Calling buckets
{
buckets[c[i]].traverse(ch, c[i]);
}
}
public void print(int ch)
{
for (int i = 0; i < c.Count; i++)//Calling buckets
{
buckets[c[i]].print(ch, c[i]);
}
}
}
/*
* Bucket to store the suffixes of string
* Array based impelementation of buckets
*/
class Buckets
{
string[] buc; //Bucket
int h; //pointer help in inserting elements in bucket
int growthFactor; //by which we increase the size of bucket
int initialSize; //Starting size of bucket
int finalLimit; //Time to burst
Trie root;
public Buckets()
{
root = null;
finalLimit = threshold;
initialSize = bucket_start_size;
buc = new string[initialSize];
h = 0;
growthFactor = bucket_growth_factor;
}
//Inerting elements in buckets
public void insert(string s)
{
if (h < initialSize && h != finalLimit)//When bucket is not full
{
buc[h] = s;
h++;
}
if (h == initialSize && h != finalLimit)//When bucket is full
{
buc = increaseSize(buc);//Increasing the size of bucket
}
if (h >= finalLimit)//When bucket reaches threshold
{
/*
* "BURSTING PHASE"
* Create a new trie node
* Inserting bucket elements in new trie node
*/
for (int i = 0; i < h; i++)
{
insert(buc[i]);//Calling the same method which we use before
}
}
}
/*
* Increase size of bucket by bucket_growth_factor
*/
public string[] increaseSize(string[] b)
{
initialSize *= growthFactor;
string[] a = new string[initialSize];
for (int i = 0; i < b.Length; i++)
{
a[i] = b[i];
}
return a;
}
/** traversing the buckets */
public void traverse(int c, int ch)
{
if (root == null)
{
for (int i = 0; i < h; i++)
{
if (h > 1)//when the size of bucket is more than one
{
/** Applying Multikey_Quick_Sort to sort buckets */
MultikeyQuickSort sort = new MultikeyQuickSort();
buc = sort.sort(buc, 0, h - 1, 0);
}
}
}
else
{
root.traverse(ch);
}
}
//To print the sorted data
public void print(int c, int ch)
{
if (root == null)
{
for (int i = 0; i < h; i++)
{
Console.Write(Convert.ToChar(c));
Console.WriteLine(Convert.ToChar(ch) + buc[i]);
}
}
else
{
root.print(ch);
}
}
}
class MultikeyQuickSort
{
/*
* Multikey_Quick_Sort divide array into three parts
*
* 1-greater than pivot
* 2-less than pivot
* 3-equal to pivot
*
*/
public String[] sort(String[] a, int lo, int hi, int d)
{
if (hi <= lo) return a;
int lt = lo, gt = hi;
string s1 = a[lo];//pivot
int v = -1;
if (d < s1.Length)
{
v = s1[d];
}
int i = lo + 1;
while (i <= gt)
{
string s = a[i];
int t = -1;
if (d < s1.Length)
{
t = s[d];
}
if (t < v) interChange(a, lt++, i++); //part 1
else if (t > v) interChange(a, i, gt--); //part 2
else i++; //part 3
}
sort(a, lo, lt - 1, d);
if (v >= 0)
sort(a, lt, gt, d + 1);
sort(a, gt + 1, hi, d);
return a;
}
//utility function which interchange the string values
private void interChange(String[] a, int i, int j)
{
String temp = a[i];
a[i] = a[j];
a[j] = temp;
}
}
}
}
Referências
- ↑ «BurstSort/burstsort at master · makhan1101/BurstSort». GitHub (em inglês). Consultado em 24 de setembro de 2021
Ligações externas
editar- O artigo original sobre o burstsort: Cache-Conscious Sorting of Large Sets of Strings with Dynamic Tries
- Um programa derivado do burstsort (C-burstsort), mais rápido do que o burstsort: Cache-Efficient String Sorting Using Copying
- O tipo de dados utilizado no burstsort: Burst Tries: A Fast, Efficient Data Structure for String Keys
- Efficient Trie-Based Sorting of Large Sets of Strings
- Engineering Burstsort: Towards Fast In-Place String Sorting
- Uma implementação do burstsort em C++: Free C++ Copy-Burstsort Library
- Uma implementação do burstsort em Java: burstsort4j