使用python产生半月形正负样本数据集

《神经网络与机器学习》中有生成半月形数据用于训练与测试,参考了相应的matlab程序,改用python实现。

halfmoon产生标准数据,前一半数据为正例,后一半数据为负例

halfmoon_shuffle产生打乱次序的数据

# -*- coding: utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt


def halfmoon(rad, width, d, n_samp): 
     
    if n_samp%2 != 0:  
        n_samp += 1  
    
    data = np.zeros((3,n_samp))
      
    aa = np.random.random((2,n_samp/2))  
    radius = (rad-width/2) + width*aa[0,:] 
    theta = np.pi*aa[1,:]        
      
    x     = radius*np.cos(theta)  
    y     = radius*np.sin(theta)  
    label = np.ones((1,len(x)))         # label for Class 1  
      
    x1    = radius*np.cos(-theta) + rad  
    y1    = radius*np.sin(-theta) - d  
    label1= -1*np.ones((1,len(x1)))     # label for Class 2  
     
    data[0,:]=np.concatenate([x,x1])
    data[1,:]=np.concatenate([y,y1])
    data[2,:]=np.concatenate([label,label1],axis=1)
    
    return data

def halfmoon_shuffle(rad, width, d, n_samp): 
     
    data = halfmoon(rad, width, d, n_samp)      
    shuffle_seq = np.random.permutation(np.arange(n_samp))  
    data_shuffle = data[:,shuffle_seq]
    
    return data_shuffle


if __name__ == "__main__":
    dataNum = 1000
    data = halfmoon(10,5,-2,dataNum)
    pos_data = data[:,0:dataNum/2]
    neg_data = data[:,dataNum/2:dataNum]
    
    np.savetxt('halfmoon.txt', data.T,fmt='%4f',delimiter=',')
    
    plt.figure()
    plt.scatter(pos_data[0,:],pos_data[1,:],c="b",s=10)
    plt.scatter(neg_data[0,:],neg_data[1,:],c="r",s=10)
    plt.show()
    

输出结果:

matlab代码:

function [data, data_shuffled] = halfmoon(rad,width,d,n_samp)
% A function to generate the halfmoon data
% where Input:
%         rad  - central radius of the half moon
%        width - width of the half moon
%           d  - distance between two half moon
%      n_samp  - total number of the samples
%       Output:
%         data - output data
%data_shuffled - shuffled data
% For example
% halfmoon(10,2,0,1000) will generate 1000 data of 
% two half moons with radius [9-11] and space 0.

if rad < width/2,
    error('The radius should be at least larger than half the width');
end

if mod(n_samp,2)~=0,
    error('Please make sure the number of samples is even');
end

aa = rand(2,n_samp/2);
radius = (rad-width/2) + width*aa(1,:);
theta = pi*aa(2,:);


x     = radius.*cos(theta);
y     = radius.*sin(theta);
label = 1*ones(1,length(x));  % label for Class 1

x1    = radius.*cos(-theta) + rad;
y1    = radius.*sin(-theta) - d;
label1= -1*ones(1,length(x)); % label for Class 2

data  = [x, x1;
         y, y1;
         label, label1];
     
[n_row, n_col] = size(data);

shuffle_seq = randperm(n_col);

for i = (1:n_col),
    data_shuffled(:,i) = data(:,shuffle_seq(i));
end;


猜你喜欢

转载自blog.csdn.net/xfijun/article/details/80732551