list的插入排序

公司对入职同学的一道考试题：有大约40万个数字(数字范围：0～200000000000000），数字没有重复，这些数字在一个txt文件中，每行为一个数字。
 * 这个txt文件放在一个WEB服务器上， 可以通过http://ip:8888/numbers.txt 下载。
 * 求这些数字中，最大的100个数字之和。
 * 注意：运行堆内存只有4M （ java -Xmx4m )
 * 不允许写临时文件
 * 要求耗时：小于2秒 (CPU:Intel i5-4590 3.3GHz)

1. 简单的思路是：从输入流中按行读取所有数字放到一个集合中，然后排序，并取前100个的和，只是这样会比较慢。

读取流的时间应该是省不了，但是可以改进下排序求和的过程：在插入的时候直接比较排序

1.1 定义一个LinkedList，第一个元素直接插入，然后
1.2 如果待插入的数值大于第一个元素，就将其插到列表首节点。
1.3. 否则如果待插入的元素小于末尾元素，就将其插到列表末尾节点。
1.4. 否则就挨个比较，放入比前一个节点小比后一个节点大的位置。
1.5. 放完之后如果列表长度大于100，就去除最后一个节点（index=100）。

这样可以预估一下最坏的情况下需要的比较次数： 0 + 1 + 2 + 3 + ... + 99 + (400000 - 100) * 100 次

public class Demo {
 
    private static LinkedList<Long> list = new LinkedList<Long>();
 
    public static long p() throws IOException{
        String url = "http://localhost:8888/numbers.txt";
        URLConnection urlconn = new URL(url).openConnection(); 
        urlconn.connect();
        HttpURLConnection httpconn =(HttpURLConnection)urlconn;
        int resp  = httpconn.getResponseCode();
        if(httpconn.getResponseCode() != HttpURLConnection.HTTP_OK) {
            throw new IOException("连接失败["+ resp +"],url:" + url);
        } 
        BufferedReader br = null;
        try{
            InputStreamReader input = new InputStreamReader(urlconn.getInputStream(),"UTF-8");
            br = new BufferedReader(input, 4 * 1024 *1024);  //4M缓存
            String line = "";
            while((line = br.readLine()) != null){
                try{
                    list.add(Long.parseLong(line.trim()));
                    //最多只插入100，减少遍历次数
                    if(list.size() > 100){
                        list.removeLast();
                    }
                }catch(Exception e){
                    System.out.println("非法数据：" + line); 
                }
            }
        }finally{
            if(br != null){
                br.close();
            }
        }
        long result = 0;
        for(long i : list){
            result = result + i;
        }
        return result;
    }
 
    private static void add(long e) {
        //空集合，直接插入元素
        if(list.isEmpty()){
            list.add(e);
            return;
        }
        //大于头节点，直接插入到头节点
        if(e >= list.getFirst()){
            list.addFirst(e);
            return;
        }
        //小于尾节点，直接插入尾节点
        if(e < list.getLast()){
            if(list.size() < 100){
                list.addLast(e);
            }
            return;
        }
        //插入到中间节点
        for (int i = 0; i < list.size(); i++) {
            if (e >= list.get(i + 1)) {
                list.add(i + 1, e);
                return;
            }
        }
    }
    
    public static void test(){
        SecureRandom random = new SecureRandom();  
        List<Long> alist = new ArrayList<Long>(400000);
        for(int i = 0;i < 400000;i++){
            long a = random.nextLong() % 200000000000000L;
            alist.add(a);
        }
 
        long t = System.currentTimeMillis();
        for(long l : alist){
            add(l);
            if(list.size() > 100){
                list.removeLast();
            }
        }
        System.out.println("插入耗时:" + (System.currentTimeMillis() - t) + "ms");
        System.out.println("size:" + list.size());
        long result = 0;
        for(long i : list){
            result = result + i;
        }
        System.out.println("result:" + result);
        System.out.println("总耗时:" + (System.currentTimeMillis() - t) + "ms");
        System.out.println(list);
    }
 
    public static void main(String[] args) throws IOException {
        test();
    }
}

因为这里主要的操作是插入删除，所以定义LinkedList实例。指给LinkedList的引用，是为了可以直接调用addFirst(E e)，addLast(E,e)和removeLast(E,e)方法，如果指给List引用的话，调用List的add(int

index ,E e)和remove(int index)其实也一样，LinkedList是链表实现的，本身不具有index下标属性，为了实现List的方法，它通过判断index是靠前还是靠后，决定从前向后还是从后向前通过引用计数。

这样实现后，在排序上所花费的时间会有明显减少，可以单独测一下排花费的时间ms：16 16 15 16 16 16 15 16 16 15

2. LinkedList的add()方法在插入中间节点时，其实寻找了两次元素的下标，get(i)和add(i,e)，如果在比较得出元素应该插入的index时就直接将元素插入是不是可以更节省时间呢。

由于LinkedList的一些方法和元素不可继承，就模拟写了个SortLongLinkedList，添加了一个linkLongWithSort(long e)方法，对于待插入的元素，当找到位置时直接插入，而不是先记下找到的位置index，然后再调用add把它插入到index位置

import java.util.Iterator;
import java.util.NoSuchElementException;
 
public class SortLongLinkedList implements Iterable<Long>{
 
    private int size = 0;
 
    private Node<Long> first;
 
    private Node<Long> last;
 
    //e < first.item && e > last.item
    @SuppressWarnings({ "unchecked", "rawtypes" })
    public void linkLongWithSort(long e){
        Node<Long> nextNode = first.next;
        for(;;){
            if(e < nextNode.item){
                nextNode = nextNode.next;
                continue;
            }
            Node<Long> preNode = nextNode.prev;
            Node<Long> newNode = new Node(preNode,e,nextNode);
            preNode.next = newNode;
            nextNode.prev = newNode;
            size++;
            break;
        }
    }
 
    public Long getFirst() {
        final Node<Long> f = first;
        if (f == null)
            throw new NoSuchElementException();
        return f.item;
    }
 
    public Long getLast() {
        final Node<Long> l = last;
        if (l == null)
            throw new NoSuchElementException();
        return l.item;
    }
 
    public void linkFirst(Long e) {
        final Node<Long> f = first;
        final Node<Long> newNode = new Node<Long>(null, e, f);
        first = newNode;
        if (f == null)
            last = newNode;
        else
            f.prev = newNode;
        size++;
    }
 
    public void linkLast(Long e) {
        final Node<Long> l = last;
        final Node<Long> newNode = new Node<>(l, e, null);
        last = newNode;
        if (l == null)
            first = newNode;
        else
            l.next = newNode;
        size++;
    }
 
    public Long unlinkLast() {
        final Long element = last.item;
        final Node<Long> prev = last.prev;
        last.item = null;
        last.prev = null; // help GC
        
        last = prev;
        if (prev == null)
            first = null;
        else
            prev.next = null;
        size--;
        return element;
    }
 
    public boolean isEmpty() {
        return size == 0;
    }
    
    public int size() {
        return size;
    }
 
    private static class Node<E> {
        E item;
        Node<E> next;
        Node<E> prev;
 
        Node(Node<E> prev, E element, Node<E> next) {
            this.item = element;
            this.next = next;
            this.prev = prev;
        }
    }
 
    @Override
    public Iterator<Long> iterator() {
        return new Iterator<Long>(){
            Node<Long> current = first;
 
            @Override
            public boolean hasNext() {
                return current.next != last;
            }
 
            @Override
            public Long next() {
                long l = current.next.item;
                current = current.next;
                return l;
            }
 
            @Override
            public void remove() {
                // TODO Auto-generated method stub
            }
        };
    }
    
    public String toString(){
        StringBuilder builder = new StringBuilder();
        Node<Long> n = first;
        builder.append(n.item);
        while((n = n.next) != last){
            builder.append(",");
            builder.append(n.item);
        }
        builder.append(",");
        builder.append(last.item);
        return builder.toString();
    }
}

然后将使用的LinkedList改为SortLongLinkedList：

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.List;
 
public class Demo2 {
 
    private static SortLongLinkedList list = new SortLongLinkedList();
 
    public static long p() throws IOException{
        String url = "http://localhost:8888/numbers.txt";
        URLConnection urlconn = new URL(url).openConnection(); 
        urlconn.connect();
        HttpURLConnection httpconn =(HttpURLConnection)urlconn;
        int resp  = httpconn.getResponseCode();
        if(httpconn.getResponseCode() != HttpURLConnection.HTTP_OK) {
            throw new IOException("连接失败["+ resp +"],url:" + url);
        } 
        BufferedReader br = null;
        try{
            InputStreamReader input = new InputStreamReader(urlconn.getInputStream(),"UTF-8");
            br = new BufferedReader(input, 4 * 1024 *1024);  //4M缓存
            String line = "";
            while((line = br.readLine()) != null){
                try{
                    add(Long.parseLong(line.trim()));
                    //最多只插入100，减少遍历次数
                    if(list.size() > 100){
                        list.unlinkLast();
                    }
                }catch(Exception e){
                    System.out.println("非法数据：" + line); 
                }
            }
        }finally{
            if(br != null){
                br.close();
            }
        }
 
        long result = 0;
        for(long i : list){
            result = result + i;
        }
        return result;
    }
 
    private static void add(long e) {
        //空集合，直接插入元素
        if(list.isEmpty()){
            list.linkFirst(e);
            return;
        }
        //大于头节点，直接插入到头节点
        if(e >= list.getFirst()){
            list.linkFirst(e);
            return;
        }
        //小于尾节点，直接插入尾节点
        if(e < list.getLast()){
            if(list.size() < 100){
                list.linkLast(e);
            }
            return;
        }
        //插入到中间节点
        list.linkLongWithSort(e);
    }
    
    public static void test(){
        SecureRandom random = new SecureRandom();  
        List<Long> alist = new ArrayList<Long>(400000);
        for(int i = 0;i < 400000;i++){  
            long a = random.nextLong() % 200000000000000L; 
            alist.add(a);
        }
 
        long t = System.currentTimeMillis();
        for(long l : alist){
            add(l);
            if(list.size() > 100){
                list.unlinkLast();  
            }
        }
        System.out.println("插入耗时:" + (System.currentTimeMillis() - t) + "ms");
        System.out.println("size:" + list.size());
        long result = 0;
        for(long i : list){
            result = result + i;
        }
        System.out.println("result:" + result);
        System.out.println("总耗时:" + (System.currentTimeMillis() - t) + "ms");
        System.out.println(list); 
    }
    
    public static void main(String[] args) throws IOException {
        test();
    }
}

把两个demo都测10遍，记录插入排序得出400000个长整型数中前100个元素的耗时(ms)，可以看出一些非常微小的提升。用SortLongLinkedList测的结果：14 14 14 15 15 15 15 14 14 15

3. 在插入中间节点时省去了一次获取下标为index的元素的动作，但是在比较的时候还是顺序比较的，如果将顺序比较换成二分法排序插入，应该是可以更快的。

但是由于LinkedList不能直接通过下标获取元素，如果在LinkedList上对元素个数用二分法排序反而适得其反，因为获取中间下标元素本身就增加了寻找动作。

所以直接用ArrayList反而比较合适，只是ArrayList中掺和了数组拷贝的成本，为了体现出数组拷贝的开销，这里直接定义了一个固定长度的数组，本质上与使用ArrayList是相同的动作。

import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
 
public class Demo3 {
 
    static Long[] array = new Long[100];
 
    static int inserted = 0;
 
    public static void test(){
        SecureRandom random = new SecureRandom();  
        List<Long> alist = new ArrayList<Long>(400000);
        for(int i = 0;i < 400000;i++){  
            long a = random.nextLong() % 200000000000000L; 
            alist.add(a);
        }
 
        long t = System.currentTimeMillis();
        for(long l : alist){
            add(l);
        }
        System.out.println("插入耗时:" + (System.currentTimeMillis() - t) + "ms");
 
        long result = 0;
        for(long i : array){
            result = result + i;
        }
        System.out.println("result:" + result);
        System.out.println("总耗时:" + (System.currentTimeMillis() - t) + "ms");
        System.out.println(Arrays.asList(array));
    }
 
    private static void count(){
        if(inserted >= 100){
            return;
        }
        inserted++;
    }
 
    private static void add(long e){
        if(array[0] == null){
            array[0] = e;
            count();
            return;
        }
 
        //大于头节点，直接插入到头节点
        if(e >= array[0]){
            int move = inserted;
            if(inserted == 100){
                move = 99;
            }
            System.arraycopy(array, 0, array, 1, move);
            array[0] = e;
            count();
            return;
        }
        //小于尾节点，直接插入尾节点
        if(e < array[inserted - 1]){ 
            if(inserted < 100){
                array[inserted] = e;
                count();
            }
            return;
        }
 
        middleAdd(e);
    }
 
    // 二分法查找中间插入
    private static void middleAdd(long e){
        int left = 0;
        int right = inserted - 1;
        int middle = 0;
        while( right >= left){
            middle = ( left + right) / 2;
            if(e < array[middle]){
                left = middle + 1; 
            }else if(e > array[middle]){
                right = middle - 1;
            }
        }
        
        int index = middle;
        if(right == middle){//right < left && e <= array[middle]
            index = middle + 1; 
        }
        
        int move = inserted - index;
        if(move > 0){
            if(inserted == 100){//数组越界
                move = move - 1;
            }
            System.arraycopy(array, index, array, index + 1, move);
        }
        array[index] = e;
        count();
    }
 
    public static void main(String[] args) {
        test();
    }
}

也纪录了下插入排序得出400000个长整型数中前100个元素的耗时(ms)，测了10组数据：11 11 11 11 13 12 11 15 12 11

这次提升比上次明显，除了二分法插入排序，还有一个原因是少了很多java对象实例的创建，在使用List的时候，他其实会把每个元素构造成一个Node的节点实例对象，这个实例的new动作积少成多也是可观的。这里其实也有自动装箱long为Long实例的过程，如果将上面的数组的Long[] array改为long array，将看到耗时会再次降低，测的时候发现最少只要9ms就可以了。

猜你喜欢