9.1.11、Scala__scala的练习题,scala中类似连表联查的方式,没有join,使用map进行关联

// 1、统计班级人数
// 2、统计学生的总分

// 1、统计年级排名前十学生各科的分数 [学号,学生姓名,学生班级,科目名,分数]
// 2、统计总分大于年级平均分的学生 [学号,姓名,班级,总分]
// 3、统计每科都及格的学生 [学号,姓名,班级,科目,分数]
// 4、统计每个班级的前三名 [学号,姓名,班级,分数]
// 5、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数]

package com.shujia

import org.junit.{
    
    Before, Test}
import scala.collection.mutable.ListBuffer
import scala.io.{
    
    BufferedSource, Source}

class Demo30propect {
    
    

  var students: List[Students] = _
  var score: List[Score] = _
  var subject: List[Subject] = _

  //    1、统计班级人数
  //    2、统计学生的总分

  //    1、统计年级排名前十学生各科的分数 [学号,学生姓名,学生班级,科目名,分数]
  //    2、统计总分大于年级平均分的学生 [学号,姓名,班级,总分]
  //    3、统计每科都及格的学生 [学号,姓名,班级,科目,分数]
  //    4、统计每个班级的前三名 [学号,姓名,班级,分数]
  //    5、统计偏科最严重的前100名学生  [学号,姓名,班级,科目,分数]

  /**
    * 读取数据文件
    */
  @Before
  def readFile() = {
    
    
    //读取文件
    val stu_bs: BufferedSource = Source.
    fromFile("D:\\Program Files\\IDEA\\IdeaProject\\bigdata\\data\\students.txt")

    val sco_bs: BufferedSource = Source.
    fromFile("D:\\Program Files\\IDEA\\IdeaProject\\bigdata\\data\\score.txt")

    val sub_bs: BufferedSource = Source.
    fromFile("D:\\Program Files\\IDEA\\IdeaProject\\bigdata\\data\\subject.txt")

    students = stu_bs.getLines()	//将读取文件转成list便于操作
      .toList
      .map(line => {
    
    
        val splits: Array[String] = line.split(",")
        val id: Int = splits(0).toInt
        val name: String = splits(1)
        val age: Int = splits(2).toInt
        val gender: String = splits(3)
        val clazz: String = splits(4)
        Students(id, name, age, gender, clazz)	//最后一条为默认返回的数据类型是Students类
      })

    score = sco_bs.getLines()
      .toList
      .map(line => {
    
    
        val splits: Array[String] = line.split(",")
        val id: Int = splits(0).toInt
        val subject_id: Int = splits(1).toInt
        val score: Int = splits(2).toInt
        Score(id, subject_id, score)
      })

    subject = sub_bs.getLines()
      .toList
      .map(line => {
    
    
        val splits: Array[String] = line.split(",")
        val subject_id: Int = splits(0).toInt
        val subject_name: String = splits(1)
        val subject_score: Int = splits(2).toInt
        Subject(subject_id, subject_name, subject_score)
      })
    //关闭
    stu_bs.close()
    sco_bs.close()
    sub_bs.close()
  }

  /**
    * 打印数据前10条
    */
  @Test
  def printAll() = {
    
    
    students.take(10).foreach(println)	//take取前多少条数据
    score.take(10).foreach(println)
    subject.take(10).foreach(println)
  }

  /**
    * 1、统计班级人数
    */
  @Test
  def clazz_sum() = {
    
    
    students
      .groupBy(stu => stu.clazz)	///按班级分组
      .map(s => (s._1, s._2.size))	
      .foreach(println)
  }

  /**
    * 2、统计学生的总分
    */
  @Test
  def stu_cnt() = {
    
    
    score.groupBy(sco => sco.id)
      .map(kv => {
    
    
        val id: Int = kv._1
        val scores: List[Score] = kv._2
        val scor: List[Int] = scores.map(sco => sco.score)
        val sum: Int = scor.sum
        (id, sum)
      }).foreach(println)
  }

  /**
    * 1、统计年级排名前十学生各科的分数
    * [学号,学生姓名,学生班级,科目名,分数]
    */
  @Test
  def stu_score_top10() = {
    
    
    //学生成绩总分
    //group by之后数据也是kv格式的,转成list才可以排序
    val stu_top10_id: List[Int] = score.groupBy(sco => sco.id)	//求top10学生的id
      .map(kv => {
    
    			//先求总分排序取前10条
        val id: Int = kv._1
        val scores: List[Score] = kv._2
        val scor: List[Int] = scores.map(sco => sco.score)
        val sum: Int = scor.sum
        (id, sum)
      }).toList //转成list
      .sortBy(-_._2) //排序降序
      .take(10) //取前10条
      .map(_._1) //只取出id

    //取学生各个成绩与科目名,根据id查询成绩(kv格式),构建map
    //以id作为k,对象本身作为v,构建map
    val scoMap: Map[Int, List[(Int, Score)]] =	//构建map
    score.map(sco => (sco.id, sco)).groupBy(_._1)	//一个id多条数据,map会覆盖所以分组在转成map
    scoMap.foreach(println)

    val subMap: Map[Int, String] = subject.map(sub => 	//构建map,便于之后的查找关联
    (sub.subject_id, sub.subject_name)).toMap	//转成map

    //提取学生信息
    students.filter(stu => stu_top10_id.contains(stu.id))	查找包含top的id的学生,过滤
      .flatMap(stu => {
    
    	//一条数据返回list集合,使用flatMap
        val top10list = ListBuffer[(Int, String, String, String, Int)]()	//创建返回的list
        val id: Int = stu.id
        val name: String = stu.name
        val clazz: String = stu.clazz
        val scores: List[Score] = scoMap(stu.id).map(_._2)	//根据id查找成绩数据,操作
        scores.map(sco => {
    
    
          val score: Int = sco.score
          val subject_id: Int = sco.subject_id
          val sub_name: String = subMap(subject_id)
          top10list.append((id, name, clazz, sub_name, score))	//追加到list中
        })
        top10list	//返回list,多条数据
      })
      .foreach(println)
  }

  /**
    * 2、统计总分大于年级平均分的学生 [学号,姓名,班级,总分]
    */
  @Test
  def score_sum_gather_avg() = {
    
    
    //每个学生的总分
    val sum_stu_score: Map[Int, Int] = score.groupBy(sco => sco.id)
      .map(kv => {
    
    
        val id = kv._1
        val scores: List[Score] = kv._2
        val scor: List[Int] = scores.map(sco => sco.score)
        val sum: Int = scor.sum
        (id, sum)
      })
    //平均分
    val avg: Double = sum_stu_score.values.sum / sum_stu_score.size.toDouble
    //println(avg)
    //构建学生map
    val stuMap: Map[Int, Students] = students.map(stu => (stu.id, stu)).toMap
    //过滤
    sum_stu_score.filter(kv => kv._2 > avg)
      .map(kv => {
    
    
        val id: Int = kv._1
        val scoresum: Int = kv._2
        val students: Students = stuMap(id)
        val name = students.name
        val clazz = students.clazz
        (id, name, clazz, scoresum)
      })
      .foreach(println)
  }

  /**
    * 3、统计每科都及格的学生 [学号,姓名,班级,科目,分数]
    */
  @Test
  def any_score_gather_avg() = {
    
    
    //计算及格分数
    val pass_sub_score_map: Map[Int, (String, Double)] = subject.map(sub => {
    
    
      val subject_id: Int = sub.subject_id
      val subject_name: String = sub.subject_name
      val pass_score: Double = sub.subject_score * 0.6 //及格分数
      (subject_id, (subject_name, pass_score))
    }).toMap
    //pass_sub_score_map.foreach(println)

    val any_pass_id: List[Int] = score.filter(sco => sco.score >= 
    pass_sub_score_map(sco.subject_id)._2)
      .groupBy(sco => sco.id)
      .map(kv => {
    
    
        val id: Int = kv._1
        val value: List[Score] = kv._2
        val size: Int = value.size
        (id, size)
      })
      .filter(_._2 == 6)
      .keys
      .toList

    val stuMap: Map[Int, Students] = students.map(stu => (stu.id, stu)).toMap

    val scoMap: Map[Int, List[(Int, Score)]] =
      score.map(sco => (sco.id, sco)).groupBy(_._1)

    //scoMap.foreach(println)

    val subMap: Map[Int, String] = subject.map(sub => (sub.subject_id, sub.subject_name)).toMap

    //学号,姓名,班级,科目,分数
    students.filter(stu => any_pass_id.contains(stu.id))
      .flatMap(stu => {
    
    
        var listbuffer = ListBuffer[(Int, String, String, String, Int)]()
        val id: Int = stu.id
        val name: String = stu.name
        val clazz: String = stu.clazz
        val scolist: List[(Int, Score)] = scoMap(id)
        scolist.foreach(sco => {
    
    
          val subject_id: Int = sco._2.subject_id
          val score: Int = sco._2.score
          val subname: String = pass_sub_score_map(subject_id)._1
          listbuffer.append((id, name, clazz, subname, score))
        })
        listbuffer
      })
      .foreach(println)
  }

  /**
    * 4、统计每个班级的前三名 [学号,姓名,班级,分数]
    */
  @Test
  def clazz_top3() = {
    
    
    //学生总分
    val stu_id_score: Map[Int, Int] = score.groupBy(sco => sco.id)
      .map(kv => {
    
    
        val id = kv._1
        val listscore: List[Score] = kv._2
        val score: List[Int] = listscore.map(_.score)
        val stu_sum_score: Int = score.sum
        (id, stu_sum_score)
      })
    students.map(stu => {
    
    
      val id = stu.id
      val name = stu.name
      val clazz = stu.clazz
      val score = stu_id_score(id)
      (id, name, clazz, score)
    })
      .groupBy(stu => stu._3)
      .flatMap(kv => {
    
    
        val clazz = kv._1
        val liststu = kv._2
        val tuples = liststu.sortBy(-_._4).take(3)
        (tuples)
      })
      .foreach(println)
  }

/**
  * 5、统计偏科最严重的前100名学生  [学号,姓名,班级,科目,分数]
  */
@Test
def unbalanceTop100Stu(): Unit = {
    
    

  val sub_score_map: Map[Int, Int] = subject
    .map(sub => {
    
    
      (sub.subject_id, sub.subject_score)
    }).toMap
  //sub_score_map.foreach(println)

  // 归一化:每门科目的满分不一样 故需要将其全部转换成百分制
  // 例如:语文满分150 小明考了90分 转换成百分制就是 90 / 150 * 100 = 60分
  val id_new_score: List[(Int, Float)] = score.map(sco => {
    
    
    val id: Int = sco.id
    val sub_score: Int = sub_score_map(sco.subject_id)
    val new_score: Float = sco.score * 100 / sub_score.toFloat
    (id, new_score)
  })
  //id_new_score.foreach(println)

  // 计算每个学生的平均成绩 得到id_avg_map
  // k:学生id  v:学生的平均成绩
  val id_avg_map: Map[Int, Float] = id_new_score
    .groupBy(kv => kv._1)
    .map(kv => {
    
    
      val id: Int = kv._1
      val values: List[(Int, Float)] = kv._2
      val sum: Float = values.map(_._2).sum
      val avg: Float = sum / values.size
      (id, avg)
    })
  //id_avg_map.foreach(println)

  val nobalanceIds: List[Int] = id_new_score.map(kv => {
    
    
    val id: Int = kv._1
    val score1: Float = kv._2
    val avg: Float = id_avg_map(id)
    (id, Math.pow(score1 - avg, 2))
  })
    .groupBy(_._1)
    .map(kv => {
    
    
      val id: Int = kv._1
      val values: List[(Int, Double)] = kv._2
      val variance: Double = values.map(_._2).sum / values.size
      (id, variance)
    })
    .toList
    .sortBy(-_._2)
    .map(_._1)
    .take(100)

  val scoMap: Map[Int, List[(Int, Score)]] = score
    .map(sco => {
    
    
      (sco.id, sco)
    }).groupBy(_._1)

  val sub_name_map: Map[Int, String] = subject
    .map(sub => {
    
    
      (sub.subject_id, sub.subject_name)
    }).toMap

  students.filter(stu => nobalanceIds.contains(stu.id))
    .flatMap(stu => {
    
    
      val lb: ListBuffer[(Int, String, String, String, Int)] 
      = ListBuffer[(Int, String, String, String, Int)]()
      val id: Int = stu.id
      val name: String = stu.name
      val clazz: String = stu.clazz
      val id_scores: List[(Int, Score)] = scoMap(id)
      id_scores.foreach(id_score => {
    
    
        val sco: Score = id_score._2
        val score1: Int = sco.score
        val sub_name: String = sub_name_map(sco.subject_id)
        lb.append((id, name, clazz, sub_name, score1))
      })
      lb
    })
    //.foreach(println)

  //三个样例类
  case class Students(id: Int, name: String, age: Int, gender: String, clazz: String)

  case class Score(id: Int, subject_id: Int, score: Int)

  case class Subject(subject_id: Int, subject_name: String, subject_score: Int)

}

猜你喜欢

转载自blog.csdn.net/nerer/article/details/121278529