1.创建一个Maven项目
pom.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.aura.bigdata</groupId>
<artifactId>strom-study</artifactId>
<version>1.0-SNAPSHOT</version>
<name>strom-study</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.storm/storm-core -->
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.0.2</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
手动开发一个本地项目:
需求:模拟天猫双十一,实时订单总和统计过程
package com.aura.bigdata.strom.local;
import com.aura.bigdata.strom.util.MyStrom;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import java.util.Map;
public class RealTimeOrderSumTopology {
/**
* 模拟天猫双十一,实时订单总和统计过程
* 在Storm中核心抽象的顶级概念就是Topology
*/
/**
*
* spout只负责产生数据一件事
*/
static class NumSpout extends BaseRichSpout{
/**
*可以理解为mapreduce中的setup方法,核心作用就是用来初始化的
*
* @param conf
* @param context
* @param collector
*/
private Map conf;
private TopologyContext context;
private SpoutOutputCollector collector;
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.conf=conf;
this.collector=collector;
this.context=context;
}
/**
* 可以理解为map-reduce方法,生成一条记录,该方法会持续不断的调用
*/
long num =0;
@Override
public void nextTuple() {
System.out.println(MyStrom.dateFormat()+"--->马云商城订单交易金额为:"+ num +"$");
this.collector.emit(new Values(num++));
MyStrom.sleep(1000);
}
/**
*
* 声明netTuple通过collector发送出现的数据别名,方便下游
* 需要说明一点的是declare中的Files和collector中发送的数据一一对应。
* @param declarer
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("num"));
}
}
static class NumBolt extends BaseRichBolt {
private Map conf;
private TopologyContext context;
private OutputCollector collector;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.conf=conf;
this.collector=collector;
this.context=context;
}
/**
* 核心处理单元,数据会接受上一个组件(spout的netTuple发送过来或者是bolt中execute发送过来的)发送过来的数据
* @param input
*/
long sum=0;
@Override
public void execute(Tuple input) {
long num=input.getLongByField("num");
sum += num;
System.out.println("======>>"+ MyStrom.dateFormat()+"--->马云商城订单交易金额为:"+ num +"$");
MyStrom.sleep(1000);
}
/**
* 该方法不一定要写,如果bolt没有下游,不必重写
* @param declarer
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("num"));
}
}
public static void main(String[] args) {
//第一步:构建Topolopyg构建器,用于组织Strom的作业,最终形成一个DAG
TopologyBuilder tb =new TopologyBuilder();
tb.setSpout("id_spout",new NumSpout());
tb.setBolt("id_bolt",new NumBolt()).shuffleGrouping("id_spout");//流分组
//第二步:使用TopologyBuilder构建Topology
StormTopology stromTopolofy=tb.createTopology();
//第三步:提交作业,可以使用集群模式或者本地模式
//这里使用的是本地模式开发
LocalCluster localCluster =new LocalCluster();
//Topology名称
String topologyName =RealTimeOrderSumTopology.class.getSimpleName();
//Strom的配置信息
Config config =new Config();
localCluster.submitTopology(topologyName,config,stromTopolofy);
}
}
手动开发一个集群部署的项目:
开启集群:
//开启主节点,运行storm ui界面
[hdp01@hdp01]nohup apps/apache-storm-1.0.2/bin/storm nimbus >/dev/null 2>&1 &
[hdp01@hdp01]nohup apps/apache-storm-1.0.2/bin/storm ui >/dev/null 2>&1 &
//开启主节点,开启从节点,开启日志文件
[hdp01@hdp02]nohup apps/apache-storm-1.0.2/bin/storm nimbus >/dev/null 2>&1 &
[hdp01@hdp02]nohup apps/apache-storm-1.0.2/bin/storm supervisor >/dev/null 2>&1 &
[hdp01@hdp02]nohup apps/apache-storm-1.0.2/bin/storm logviewer >/dev/null 2>&1 &
//开启从节点,开启日志文件
[hdp01@hdp03]nohup apps/apache-storm-1.0.2/bin/storm supervisor >/dev/null 2>&1 &
[hdp01@hdp03]nohup apps/apache-storm-1.0.2/bin/storm logviewer >/dev/null 2>&1 &
代码:
package com.aura.bigdata.strom.remote;
import com.aura.bigdata.strom.util.MyStrom;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import java.util.Map;
public class RealTimeOrderSumTopology {
/**
* 模拟天猫双十一,实时订单总和统计过程
* 在Storm中核心抽象的顶级概念就是Topology
*/
/**
*
* spout只负责产生数据一件事
*/
static class NumSpout extends BaseRichSpout{
/**
*可以理解为mapreduce中的setup方法,核心作用就是用来初始化的
*
* @param conf
* @param context
* @param collector
*/
private Map conf;
private TopologyContext context;
private SpoutOutputCollector collector;
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.conf=conf;
this.collector=collector;
this.context=context;
}
/**
* 可以理解为map-reduce方法,生成一条记录,该方法会持续不断的调用
*/
long num =0;
@Override
public void nextTuple() {
System.out.println(MyStrom.dateFormat()+"--->马云商城订单交易金额为:"+ num +"$");
this.collector.emit(new Values(num++));
MyStrom.sleep(1000);
}
/**
*
* 声明netTuple通过collector发送出现的数据别名,方便下游
* 需要说明一点的是declare中的Files和collector中发送的数据一一对应。
* @param declarer
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("num"));
}
}
static class NumBolt extends BaseRichBolt {
private Map conf;
private TopologyContext context;
private OutputCollector collector;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.conf=conf;
this.collector=collector;
this.context=context;
}
/**
* 核心处理单元,数据会接受上一个组件(spout的netTuple发送过来或者是bolt中execute发送过来的)发送过来的数据
* @param input
*/
long sum=0;
@Override
public void execute(Tuple input) {
long num=input.getLongByField("num");
sum += num;
System.out.println("======>>"+ MyStrom.dateFormat()+"--->马云商城订单交易金额为:"+ num +"$");
MyStrom.sleep(1000);
}
/**
* 该方法不一定要写,如果bolt没有下游,不必重写
* @param declarer
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("num"));
}
}
public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
//第一步:构建Topolopyg构建器,用于组织Strom的作业,最终形成一个DAG
TopologyBuilder tb =new TopologyBuilder();
tb.setSpout("id_spout",new NumSpout());
tb.setBolt("id_bolt",new NumBolt()).shuffleGrouping("id_spout");//流分组
//第二步:使用TopologyBuilder构建Topology
StormTopology stromTopolofy=tb.createTopology();
//第三步:提交作业,可以使用集群模式或者本地模式
//Topology名称
String topologyName = RealTimeOrderSumTopology.class.getSimpleName();
//Strom的配置信息
Config config =new Config();
if(args == null || args.length<1){
//这里使用的是本地模式开发
LocalCluster localCluster =new LocalCluster();
localCluster.submitTopology(topologyName,config,stromTopolofy);
}else {
//这里是集群开发
StormSubmitter.submitTopology(topologyName,config,stromTopolofy);
}
}
}
打成jar包,上传到虚拟机上,回到
Storm Ui 显示运行项目,命令是回bin目录之前
/bin/storm jar ~/jars/storm/storm-jar.jar com.aura.bigdata.strom.remote.RealTimeOrderSumTopology 1234567890
结果显示: