字符串算法（待修订）

字母表

1索引计数法低位优先

2索引计数法高位优先

public static class MSD{
    private static int R = 256;//基数
    private static final int M = 15;//小树组的切换阈值
    private static String[] aux;//数据分类的辅助数组
    private static int charAt(String s,int d){
        if(d < s.length()) return s.charAt(d); else return -1;
       }
       public static void sort(String[] a){
        int N = a.length;
        aux = new String[N];
        sort(a,0,N-1,0);
       }
       private static void sort(String[] a,int lo,int hi,int d){
           //以第d个字符为键将a[lo]至a[hi]排序
           if(hi <= lo + M){
               Insertion.sort(a,lo,hi,d);return;
           }
           int[] count = new int[R+2];//计算频率
           for(int i = lo ; i <= hi ; i++){
               count[charAt(a[i],d) + 2]++;
           }
           for(int r = 0; r < R+1 ; r++)//将频率转换为索引
               count[r+1] += count[r];
           for (int i = lo; i <= hi ; i++)//数据分类
               aux[count[charAt(a[i],d) + 1]++] = a[i];
           for (int i = lo; i < hi; i++)//回写
               a[i] = aux[i - lo];
           //递归的以下个字符为键进行排序
           for (int r = 0; r < R; r++) {
               sort(a,lo + count[r],lo + count[r+1] -1 , d+1);
           }
       }
   }

再次强调在递归排序算法的末尾处使用非递归排序替换，将大大提高性能
高位优先排序等值键危害

3未对小数组进行优化的三向切分

public static class Quick3string{
    private static int charAt(String s,int d){
        if(d < s.length()) return s.charAt(d); else return -1;
    }
    public static void sort(String[] a){
        sort(a , 0 , a.length - 1 , 0);
    }
    private static void sort(String[] a,int lo,int hi,int d){
        if(hi <= lo) return;
        int lt = lo,gt = hi;
        int v = charAt(a[lo],d);
        int i = lo + 1;
        while (i <= gt){
            int t = charAt(a[i],d);
            if     (t < v) exch(a,lt++,i++);
            else if(t > v) exch(a,i,gt--);
            else i++;
        }
        sort(a,lo,lt-1,d);
        if(v >= 0) sort(a,lt,gt,d+1);
        sort(a,gt+1,hi,d);
    }
}

4普通快速排序
5java中的排序，Arrays中使用的是TimSort（优化的merge）

6基于单词查找树的符号表

public static class TrieST<Value>{
    private static int R = 256;
    private Node root;
    private static class Node{
        private Object val;
        private Node[] next = new Node[R];
    }
    public Value get(String key){
        Node x = get(root,key,0);
        if(x == null) return null;
        return (Value)x.val;
    }
    private Node get(Node x,String key,int d){
        //返回以x作为根据点的子单词查找树中与key相关联的值
        if(x == null) return null;
        if(d == key.length()) return x;
        char c = key.charAt(d);//找到第d个字符所对应的子单词查找树
        return get(x.next[c],key,d+1);
    }
    public void put(String key,Value val){
        root = put(root,key,val,0);
    }
    private Node put(Node x,String key,Value val,int d){
        //如果key存在于以x为根结点的子单词查找树中则更新与它相关联的值
        if(x == null) x = new Node();
        if(d == key.length()){
            x.val = val; return x;
        }
        char c = key.charAt(d);//找到第d个字符所对应的子单词查找树
        x.next[c] = put(x.next[c],key,val,d+1);
        return x;

    }
}

单词查找树删除逻辑

public void delete(String key){
    root = delete(root,key,0);
}
public Node delete(Node x,String key,int d){
    if(x == null) return null;
    if(d == key.length())//到达key结束节点将val置空
        x.val = null;
    else{//否则再向下遍历
        char c = key.charAt(d);
        x.next[c] = delete(x.next[c],key,d+1);//根据下层递归返回值重置连接数组中的值
    }
    //根据当前节点的状态判断当前节点是否需要删除，返回空则表示删除
    if(x.val != null) return x;//当前val不为空则不删除
    for (char c = 0; c < R ; c++)//当前的连接数组中有不为空的子节点则不删除
        if(x.next[c] != null) return x;
    return null;//删除
}

三向单词查找树

    public class TST<Value>{
        private Node root;
        private class Node{
            char c;
            Node left,mid,right;
            Value val;
        }
//        public Value get(String key)//和单词查找树相同
        private Node get(Node x,String key,int d){
            if(x == null) return null;
            char c = key.charAt(d);
            if      (c < x.c) return get(x.left,key,d);
            else if (c > x.c) return get(x.right,key,d);
            else if (d < key.length() - 1)
                return get(x.mid,key,d+1);
            else return x;
        }
        public void put(String key,Value val){
            root = put(root,key,val,0);
        }
        private Node put(Node x,String key,Value val,int d){
            char c = key.charAt(d);
            if(x == null){ x = new Node();x.c = c; }
            if      (c < x.c) x.left = put(x.left,key,val,d);
            else if (c > x.c) x.right = put(x.right,key,val,d);
            else if (d < key.length() - 1)
                x.mid = put(x.mid,key,val,d+1);
            else x.val = val;
            return x;
        }
    }

Knuth-Morris-Pratt字符串查找算法

public class KMP{
    private String pat;
    private int[][] dfa;
    public KMP(String pat){
        //由模式字符串构造DFA
        this.pat = pat;
        int M = pat.length();
        int R = 256;
        dfa = new int[R][M];
        dfa[pat.charAt(0)][0] = 1;
        for (int X = 0,j = 1;j < M;j++){
            //计算dfa[][j]
            for (int c = 0;c < R;c++){
                dfa[c][j] = dfa[c][X];//复制匹配失败情况下的值
                dfa[pat.charAt(j)][j] = j+1;//设置匹配成功情况下的值
                X = dfa[pat.charAt(j)][X];//更新启动状态
            }
        }
    }
    public int search(String txt){
        //在txt上模拟DFA的运行
        int i,j,N = txt.length(),M = pat.length();
        for (i = 0,j = 0;i < N && j < M;i++)
            j = dfa[txt.charAt(i)][j];
        if (j == M) return i - M; //找到匹配（到达文本字符串的结尾）
        else return N; //未找到匹配（到达文本字符串的结尾）
    }
    //表5.3.1
}

启发式！！
跳跃表

    public class BoyerMoore{
        private int[] right;
        private String pat;
        BoyerMoore(String pat){
            //计算跳跃表
            this.pat = pat;
            int M = pat.length();
            int R = 256;
            right = new int[R];
            for(int c = 0; c < R ;c++){
                right[c] = -1;//不包含在模式字符串中的字符的值为-1
            }
            for (int j = 0; j < M; j++) {//包含在模式字符串中的字符的值为
                right[pat.charAt(j)] = j;//它在其中出现的最右位置
            }
        }
        public int search(String txt){
            //在txt中查找模式字符串
            int N = txt.length();
            int M = pat.length();
            int skip;
            for (int i = 0; i <= N-M; i+= skip) {
                //模式字符串和文本在位置i匹配么？
                skip = 0;
                for (int j = M-1; j >= 0 ; j--) {
                    if(pat.charAt(j) != txt.charAt(i+j)){
                        skip = j - right[txt.charAt(i + j)];
                        if(skip < 1) skip = 1;
                        break;
                    }
                }
                if(skip == 0) return i;//找到匹配
            }
            return N;//未找到匹配
        }

//        public static void main(String[] args) {//见表5。3。1
//
//        }
    }

Rabin-Karp指纹字符串查找算法

public class RabinKarp{
private String pat;//模式字符串（仅拉斯维加斯算法需要）
private long patHash;//模式字符串散列值
private int M;//模式字符串的长度
private long Q;//一个很大的素数
private int R = 256;//字母表的大小
private long RM;//R^(M-1)%Q

public RabinKarp(String pat){
    this.pat = pat;//保存模式字符串（仅拉斯维加斯算法需要）
    this.M = pat.length();
    Q = longRandomPrime();//见练习5。3。33
    RM = 1;
    for (int i = 0; i < M - 1; i++) {//计算R^(M-1)%Q
        RM = (R * RM) % Q;//用于减去第一个数字时的计算
    }
    patHash = hash(pat,M);
}
public boolean check(int i){//蒙特卡洛算法（见正文）
    return true;//对于拉斯维加斯算法，检查模式与txt(i..i-M+1)的匹配
}
private long hash(String key,int M){
    //计算key[0..M-1]的散列值
    long h = 0;
    for (int j = 0; j < M; j++) {
        h = (R * h + key.charAt(j)) % Q;
    }
    return h;
}
private int search(String txt){
    //在文本中查找相等的散列值
    int N = txt.length();
    long txtHash = hash(txt,M);
    if(patHash == txtHash&&check(0)) return 0;//一开始就匹配成功
    for (int i = 0; i < N; i++) {
        //减去第一个数字，加上最后一个数字，再次检查匹配
        txtHash = (txtHash + Q - RM * txt.charAt(i - M) % Q) % Q;
        txtHash = (txtHash * R + txt.charAt(i)) % Q;
        if(patHash == txtHash){
            if(check(i - M + 1)) return i - M + 1;//找到匹配
        }
    }
    return N;//未找到匹配
}

}

正则表达式的模式匹配（grep）

public class NFA{
    private char[] re;//匹配转换
    private Digraph G;//epsilon转换
    private int M;
    public NFA(String regexp){
        //根据给定的正则表达式构造NFA
        Stack<Integer> ops = new Stack<Integer>();
        re = regexp.toCharArray();
        M = re.length;
        G = new Digraph(M+1);
        for (int i = 0; i < M; i++) {
            int lp = i;
            if(re[i] == '(' || re[i] == '|')
                ops.push(i);
            else if(re[i] == ')'){
                int or = ops.pop();
                if(re[or] == '|'){
                    lp = ops.pop();
                    G.addEdge(lp,or+1);
                    G.addEdge(or,i);
                }else{
                    lp = or;
                }
            }
            if(i < M-1 && re[i+1] == '*'){//查看下一个字符
                G.addEdge(lp,i+1);
                G.addEdge(i+1,lp);
            }
            if(re[i] == '(' || re[i] == '*' || re[i] == ')'){
                G.addEdge(i,i+1);
            }
            
        }
    }
    public boolean recognizes(String txt){
        //NFA是否能够识别文本txt?
        Bag<Integer> pc = new Bag<Integer>();
        DirectedDFS dfs = new DirectedDFS(G,0);
        for (int v = 0; v < G.V(); v++) {
            if(dfs.marked(v)) pc.add(v);
        }
        for (int i = 0; i < txt.length(); i++) {
            //计算txt[i+1]可能到达的所有NFA状态
            Bag<Integer> match = new Bag<Integer>();
            for(int v:pc){
                if(v < M){
                    if(re[v] == txt.charAt(i) || re[v] == '.')
                        match.add(v+1);
                }
            }
            pc = new Bag<Integer>();
            dfs = new DirectedDFS(G,match);
            for (int v = 0; v < G.V(); v++) {
                if(dfs.marked(v)) pc.add(v);
            }
        }
        for (int v : pc) {
            if(v == M) return true;
        }
        return false;
    }
}

霍夫曼压缩
变长前缀码
前缀码单词查找树
树的加权外部路径长度

public static class Huffman{
    private static int R = 256;//ASCII字母表
    //Node内部类
    private static class Node implements Comparable<Node>{
        //霍夫曼单词查找树中的节点
        private char ch;
        private int freq;
        private final Node left,right;

        Node(char ch,int freq,Node left,Node right){
            this.ch = ch;
            this.freq = freq;
            this.left = left;
            this.right = right;
        }
        public boolean isLeaf(){
            return left == null && right == null;
        }

        @Override
        public int compareTo(Node that) {
            return this.freq = that.freq;
        }
    }
    //其他辅助方法和expand方法
    public static void expand(){
        Node root = readTrie();
        int N = BinaryStdIn.readInt();
        for (int i = 0; i < N; i++) {
            //展开第i个编码所对应的字母
            Node x = root;
            while(!x.isLeaf()){
                if(BinaryStdIn.readBoolean()){
                    x = x.right;
                }else {
                    x = x.left;
                }
            }
            BinaryStdOut.write(x.ch);
        }
        BinaryStdOut.close();
    }
    private static String[] buildCode(Node root){
        //使用单词查找树构造编译表
        String[] st = new String[R];
        buildCode(st,root,"");
        return st;
    }
    private static void buildCode(String[] st,Node x,String s){
        //使用单词查找树构造编译表（递归）
        if(x.isLeaf()){
            st[x.ch] = s;
            return;
        }
        buildCode(st,x.left,s + '0');
        buildCode(st,x.right,s + '1');
    }
    private static Node buildTrie(int[] freq){
        //使用多棵单节点树初始化优先队列
        MinPQ<Node> pq = new MinPQ<Node>();
        for (char c = 0; c < R; c++) {
            if(freq[c] > 0){
                pq.insert(new Node(c,freq[c],null,null));
            }
        }
        while(pq.size() > 1){
            //合并两颗频率最小的树
            Node x = pq.delMin();
            Node y = pq.delMin();
            Node parent = new Node('\0',x.freq + y.freq,x,y);
            pq.insert(parent);
        }
        return pq.delMin();
    }
    private static void writeTrie(Node x){
        //输出单词查找树的比特字符串
        if(x.isLeaf()){
            BinaryStdOut.write(true);
            BinaryStdOut.write(x.ch);
            return;
        }
        BinaryStdOut.write(false);
        writeTrie(x.left);
        writeTrie(x.right);
    }

    private static Node readTrie(){
        if(BinaryStdIn.readBoolean()){
            return new Node(BinaryStdIn.readChar(),0,null,null);
        }
        return new Node('\0',0,readTrie(),readTrie());
    }


    public static void compress(){
        //读取输入
        String s = BinaryStdIn.readString();
        char[] input = s.toCharArray();
        //统计频率
        int[] freq = new int[R];
        for (int i = 0; i < input.length; i++) {
            freq[input[i]]++;
        }
        //构造霍夫曼编码树
        Node root = buildTrie(freq);
        //（递归地）构造编译表
        String[] st = new String[R];
        buildCode(st,root,"");
        //（递归地）打印解码用的单词查找树
        writeTrie(root);
        //打印字符总数
        BinaryStdOut.write(input.length);
        //使用霍夫曼编码处理输入
        for (int i = 0; i < input.length; i++) {
            String code = st[input[i]];
            for (int j = 0; j < code.length(); j++) {
                if(code.charAt(j) == '1'){
                    BinaryStdOut.write(true);
                }else {
                    BinaryStdOut.write(false);
                }
            }
        }
        BinaryStdOut.close();
    }

}

字符串算法（待修订）

猜你喜欢