字母表
1索引计数法低位优先
2索引计数法高位优先
public static class MSD{
private static int R = 256;//基数
private static final int M = 15;//小树组的切换阈值
private static String[] aux;//数据分类的辅助数组
private static int charAt(String s,int d){
if(d < s.length()) return s.charAt(d); else return -1;
}
public static void sort(String[] a){
int N = a.length;
aux = new String[N];
sort(a,0,N-1,0);
}
private static void sort(String[] a,int lo,int hi,int d){
//以第d个字符为键将a[lo]至a[hi]排序
if(hi <= lo + M){
Insertion.sort(a,lo,hi,d);return;
}
int[] count = new int[R+2];//计算频率
for(int i = lo ; i <= hi ; i++){
count[charAt(a[i],d) + 2]++;
}
for(int r = 0; r < R+1 ; r++)//将频率转换为索引
count[r+1] += count[r];
for (int i = lo; i <= hi ; i++)//数据分类
aux[count[charAt(a[i],d) + 1]++] = a[i];
for (int i = lo; i < hi; i++)//回写
a[i] = aux[i - lo];
//递归的以下个字符为键进行排序
for (int r = 0; r < R; r++) {
sort(a,lo + count[r],lo + count[r+1] -1 , d+1);
}
}
}
再次强调在递归排序算法的末尾处使用非递归排序替换,将大大提高性能
高位优先排序等值键危害
3未对小数组进行优化的三向切分
public static class Quick3string{
private static int charAt(String s,int d){
if(d < s.length()) return s.charAt(d); else return -1;
}
public static void sort(String[] a){
sort(a , 0 , a.length - 1 , 0);
}
private static void sort(String[] a,int lo,int hi,int d){
if(hi <= lo) return;
int lt = lo,gt = hi;
int v = charAt(a[lo],d);
int i = lo + 1;
while (i <= gt){
int t = charAt(a[i],d);
if (t < v) exch(a,lt++,i++);
else if(t > v) exch(a,i,gt--);
else i++;
}
sort(a,lo,lt-1,d);
if(v >= 0) sort(a,lt,gt,d+1);
sort(a,gt+1,hi,d);
}
}
4普通快速排序
5java中的排序,Arrays中使用的是TimSort(优化的merge)
6基于单词查找树的符号表
public static class TrieST<Value>{
private static int R = 256;
private Node root;
private static class Node{
private Object val;
private Node[] next = new Node[R];
}
public Value get(String key){
Node x = get(root,key,0);
if(x == null) return null;
return (Value)x.val;
}
private Node get(Node x,String key,int d){
//返回以x作为根据点的子单词查找树中与key相关联的值
if(x == null) return null;
if(d == key.length()) return x;
char c = key.charAt(d);//找到第d个字符所对应的子单词查找树
return get(x.next[c],key,d+1);
}
public void put(String key,Value val){
root = put(root,key,val,0);
}
private Node put(Node x,String key,Value val,int d){
//如果key存在于以x为根结点的子单词查找树中则更新与它相关联的值
if(x == null) x = new Node();
if(d == key.length()){
x.val = val; return x;
}
char c = key.charAt(d);//找到第d个字符所对应的子单词查找树
x.next[c] = put(x.next[c],key,val,d+1);
return x;
}
}
单词查找树删除逻辑
public void delete(String key){
root = delete(root,key,0);
}
public Node delete(Node x,String key,int d){
if(x == null) return null;
if(d == key.length())//到达key结束节点将val置空
x.val = null;
else{//否则再向下遍历
char c = key.charAt(d);
x.next[c] = delete(x.next[c],key,d+1);//根据下层递归返回值重置连接数组中的值
}
//根据当前节点的状态判断当前节点是否需要删除,返回空则表示删除
if(x.val != null) return x;//当前val不为空则不删除
for (char c = 0; c < R ; c++)//当前的连接数组中有不为空的子节点则不删除
if(x.next[c] != null) return x;
return null;//删除
}
三向单词查找树
public class TST<Value>{
private Node root;
private class Node{
char c;
Node left,mid,right;
Value val;
}
// public Value get(String key)//和单词查找树相同
private Node get(Node x,String key,int d){
if(x == null) return null;
char c = key.charAt(d);
if (c < x.c) return get(x.left,key,d);
else if (c > x.c) return get(x.right,key,d);
else if (d < key.length() - 1)
return get(x.mid,key,d+1);
else return x;
}
public void put(String key,Value val){
root = put(root,key,val,0);
}
private Node put(Node x,String key,Value val,int d){
char c = key.charAt(d);
if(x == null){ x = new Node();x.c = c; }
if (c < x.c) x.left = put(x.left,key,val,d);
else if (c > x.c) x.right = put(x.right,key,val,d);
else if (d < key.length() - 1)
x.mid = put(x.mid,key,val,d+1);
else x.val = val;
return x;
}
}
Knuth-Morris-Pratt字符串查找算法
public class KMP{
private String pat;
private int[][] dfa;
public KMP(String pat){
//由模式字符串构造DFA
this.pat = pat;
int M = pat.length();
int R = 256;
dfa = new int[R][M];
dfa[pat.charAt(0)][0] = 1;
for (int X = 0,j = 1;j < M;j++){
//计算dfa[][j]
for (int c = 0;c < R;c++){
dfa[c][j] = dfa[c][X];//复制匹配失败情况下的值
dfa[pat.charAt(j)][j] = j+1;//设置匹配成功情况下的值
X = dfa[pat.charAt(j)][X];//更新启动状态
}
}
}
public int search(String txt){
//在txt上模拟DFA的运行
int i,j,N = txt.length(),M = pat.length();
for (i = 0,j = 0;i < N && j < M;i++)
j = dfa[txt.charAt(i)][j];
if (j == M) return i - M; //找到匹配(到达文本字符串的结尾)
else return N; //未找到匹配(到达文本字符串的结尾)
}
//表5.3.1
}
启发式!!
跳跃表
public class BoyerMoore{
private int[] right;
private String pat;
BoyerMoore(String pat){
//计算跳跃表
this.pat = pat;
int M = pat.length();
int R = 256;
right = new int[R];
for(int c = 0; c < R ;c++){
right[c] = -1;//不包含在模式字符串中的字符的值为-1
}
for (int j = 0; j < M; j++) {//包含在模式字符串中的字符的值为
right[pat.charAt(j)] = j;//它在其中出现的最右位置
}
}
public int search(String txt){
//在txt中查找模式字符串
int N = txt.length();
int M = pat.length();
int skip;
for (int i = 0; i <= N-M; i+= skip) {
//模式字符串和文本在位置i匹配么?
skip = 0;
for (int j = M-1; j >= 0 ; j--) {
if(pat.charAt(j) != txt.charAt(i+j)){
skip = j - right[txt.charAt(i + j)];
if(skip < 1) skip = 1;
break;
}
}
if(skip == 0) return i;//找到匹配
}
return N;//未找到匹配
}
// public static void main(String[] args) {//见表5。3。1
//
// }
}
Rabin-Karp指纹字符串查找算法
public class RabinKarp{
private String pat;//模式字符串(仅拉斯维加斯算法需要)
private long patHash;//模式字符串散列值
private int M;//模式字符串的长度
private long Q;//一个很大的素数
private int R = 256;//字母表的大小
private long RM;//R^(M-1)%Q
public RabinKarp(String pat){
this.pat = pat;//保存模式字符串(仅拉斯维加斯算法需要)
this.M = pat.length();
Q = longRandomPrime();//见练习5。3。33
RM = 1;
for (int i = 0; i < M - 1; i++) {//计算R^(M-1)%Q
RM = (R * RM) % Q;//用于减去第一个数字时的计算
}
patHash = hash(pat,M);
}
public boolean check(int i){//蒙特卡洛算法(见正文)
return true;//对于拉斯维加斯算法,检查模式与txt(i..i-M+1)的匹配
}
private long hash(String key,int M){
//计算key[0..M-1]的散列值
long h = 0;
for (int j = 0; j < M; j++) {
h = (R * h + key.charAt(j)) % Q;
}
return h;
}
private int search(String txt){
//在文本中查找相等的散列值
int N = txt.length();
long txtHash = hash(txt,M);
if(patHash == txtHash&&check(0)) return 0;//一开始就匹配成功
for (int i = 0; i < N; i++) {
//减去第一个数字,加上最后一个数字,再次检查匹配
txtHash = (txtHash + Q - RM * txt.charAt(i - M) % Q) % Q;
txtHash = (txtHash * R + txt.charAt(i)) % Q;
if(patHash == txtHash){
if(check(i - M + 1)) return i - M + 1;//找到匹配
}
}
return N;//未找到匹配
}
}
正则表达式的模式匹配(grep)
public class NFA{
private char[] re;//匹配转换
private Digraph G;//epsilon转换
private int M;
public NFA(String regexp){
//根据给定的正则表达式构造NFA
Stack<Integer> ops = new Stack<Integer>();
re = regexp.toCharArray();
M = re.length;
G = new Digraph(M+1);
for (int i = 0; i < M; i++) {
int lp = i;
if(re[i] == '(' || re[i] == '|')
ops.push(i);
else if(re[i] == ')'){
int or = ops.pop();
if(re[or] == '|'){
lp = ops.pop();
G.addEdge(lp,or+1);
G.addEdge(or,i);
}else{
lp = or;
}
}
if(i < M-1 && re[i+1] == '*'){//查看下一个字符
G.addEdge(lp,i+1);
G.addEdge(i+1,lp);
}
if(re[i] == '(' || re[i] == '*' || re[i] == ')'){
G.addEdge(i,i+1);
}
}
}
public boolean recognizes(String txt){
//NFA是否能够识别文本txt?
Bag<Integer> pc = new Bag<Integer>();
DirectedDFS dfs = new DirectedDFS(G,0);
for (int v = 0; v < G.V(); v++) {
if(dfs.marked(v)) pc.add(v);
}
for (int i = 0; i < txt.length(); i++) {
//计算txt[i+1]可能到达的所有NFA状态
Bag<Integer> match = new Bag<Integer>();
for(int v:pc){
if(v < M){
if(re[v] == txt.charAt(i) || re[v] == '.')
match.add(v+1);
}
}
pc = new Bag<Integer>();
dfs = new DirectedDFS(G,match);
for (int v = 0; v < G.V(); v++) {
if(dfs.marked(v)) pc.add(v);
}
}
for (int v : pc) {
if(v == M) return true;
}
return false;
}
}
霍夫曼压缩
变长前缀码
前缀码单词查找树
树的加权外部路径长度
public static class Huffman{
private static int R = 256;//ASCII字母表
//Node内部类
private static class Node implements Comparable<Node>{
//霍夫曼单词查找树中的节点
private char ch;
private int freq;
private final Node left,right;
Node(char ch,int freq,Node left,Node right){
this.ch = ch;
this.freq = freq;
this.left = left;
this.right = right;
}
public boolean isLeaf(){
return left == null && right == null;
}
@Override
public int compareTo(Node that) {
return this.freq = that.freq;
}
}
//其他辅助方法和expand方法
public static void expand(){
Node root = readTrie();
int N = BinaryStdIn.readInt();
for (int i = 0; i < N; i++) {
//展开第i个编码所对应的字母
Node x = root;
while(!x.isLeaf()){
if(BinaryStdIn.readBoolean()){
x = x.right;
}else {
x = x.left;
}
}
BinaryStdOut.write(x.ch);
}
BinaryStdOut.close();
}
private static String[] buildCode(Node root){
//使用单词查找树构造编译表
String[] st = new String[R];
buildCode(st,root,"");
return st;
}
private static void buildCode(String[] st,Node x,String s){
//使用单词查找树构造编译表(递归)
if(x.isLeaf()){
st[x.ch] = s;
return;
}
buildCode(st,x.left,s + '0');
buildCode(st,x.right,s + '1');
}
private static Node buildTrie(int[] freq){
//使用多棵单节点树初始化优先队列
MinPQ<Node> pq = new MinPQ<Node>();
for (char c = 0; c < R; c++) {
if(freq[c] > 0){
pq.insert(new Node(c,freq[c],null,null));
}
}
while(pq.size() > 1){
//合并两颗频率最小的树
Node x = pq.delMin();
Node y = pq.delMin();
Node parent = new Node('\0',x.freq + y.freq,x,y);
pq.insert(parent);
}
return pq.delMin();
}
private static void writeTrie(Node x){
//输出单词查找树的比特字符串
if(x.isLeaf()){
BinaryStdOut.write(true);
BinaryStdOut.write(x.ch);
return;
}
BinaryStdOut.write(false);
writeTrie(x.left);
writeTrie(x.right);
}
private static Node readTrie(){
if(BinaryStdIn.readBoolean()){
return new Node(BinaryStdIn.readChar(),0,null,null);
}
return new Node('\0',0,readTrie(),readTrie());
}
public static void compress(){
//读取输入
String s = BinaryStdIn.readString();
char[] input = s.toCharArray();
//统计频率
int[] freq = new int[R];
for (int i = 0; i < input.length; i++) {
freq[input[i]]++;
}
//构造霍夫曼编码树
Node root = buildTrie(freq);
//(递归地)构造编译表
String[] st = new String[R];
buildCode(st,root,"");
//(递归地)打印解码用的单词查找树
writeTrie(root);
//打印字符总数
BinaryStdOut.write(input.length);
//使用霍夫曼编码处理输入
for (int i = 0; i < input.length; i++) {
String code = st[input[i]];
for (int j = 0; j < code.length(); j++) {
if(code.charAt(j) == '1'){
BinaryStdOut.write(true);
}else {
BinaryStdOut.write(false);
}
}
}
BinaryStdOut.close();
}
}