java超快速文本去重复源码

把代码过程经常用到的一些代码记录起来,下面的代码内容是关于java超快速文本去重复的代码。

import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

public class SpeedClear {

    public static void main(String[] args) {
        if(args.length==0){
            print();
            System.exit(1);
        }
        if(args.length!=2){
            System.out.println("Format error...");
            System.exit(1);
        }
        String pathname = args[0];
        String newPath = args[1];
    }

    public static void clear(String pathname, String newPath) {

        System.out.println("Start... ");

            File file = new File(pathname);
        BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));

            OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(new File(newPath)),"utf-8") ;

            Set<String> set = new HashSet<String>();
            int x = 0;
                if(x%30000==0){
                    System.out.print("..") ;
                }
                x++;
            }
            fis.close();

            for (String xxser : set) {
                out.write(xxser+"rn");

            }
            System.out.println("") ;
            System.out.println("size = " + set.size());
            System.out.println("End...");
        }catch(Exception e){

            System.out.println("文件太大了,建议先100MB大小..") ;
        }

    }

        public static void  print(){
        System.out.println("ttTo repeat tt");
        System.out.println();
        System.out.println("  format: java -Xmx1000m SpeedClear c:\old.txt c:\new.txttt");
        System.out.println();
        System.out.println("ttAuthor:xxser  QQ:616100108");

    }

}

猜你喜欢

转载自blog.51cto.com/14311234/2424235