spark创建dataFrame方式有很多种,官方API也比较多,可能用的最多的是通过textFile()读取数据源吧
但是业务上的个别场景,不适合用textFile(),于是使用下面的这个API
/** * Applies a schema to a List of Java Beans. * * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. * @since 1.6.0 */ def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = { val attrSeq = getSchema(beanClass) val rows = SQLContext.beansToRows(data.asScala.iterator, beanClass, attrSeq) Dataset.ofRows(self, LocalRelation(attrSeq, rows.toSeq)) }
样例部分代码:
ArrayList<GciGri> list = new ArrayList<GciGri>(); GciGri g = new GciGri(); g.setGci((gci)); g.setGri((gri)); list.add(g); spark.createDataFrame(list, GciGri.class).createOrReplaceTempView("testtesttest");
package cn.com.dtmobile.test; import java.io.Serializable; public class GciGri implements Serializable { private static final long serialVersionUID = 1L; private int Gci; private int Gri; public int getGci() { return Gci; } public void setGci(int gci) { Gci = gci; } public int getGri() { return Gri; } public void setGri(int gri) { Gri = gri; } }