更多代码请见:https://github.com/xubo245/SparkLearning
Spark中组件Mllib的学习之分类篇
1解释

 支持向量机(Support Vector Machine,SVM)是Corinna Cortes和Vapnik等于1995年首先提出的,它在解决小样本、非线性及高维模式识别中表现出许多特有的优势,并能够推广应用到函数拟合等其他机器学习问题中。

SVM的概念在机器学习的书上讲的比较清楚,博客请见【4】

2.代码:

/**
  * @author xubo
  *         ref:Spark MlLib机器学习实战
  *         more code:https://github.com/xubo245/SparkLearning
  *         more blog:http://blog.csdn.net/xubo245
  */
package org.apache.spark.mllib.learning.regression

import java.text.SimpleDateFormat
import java.util.Date

import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.{SparkConf, SparkContext}

/**
  * Created by xubo on 2016/5/23.
  * SVM
  */
object SVMFromSparkLearning {
  def main(args: Array[String]) {
    val conf = new SparkConf().setMaster("local[4]").setAppName(this.getClass().getSimpleName().filter(!_.equals('$')))
    val sc = new SparkContext(conf)

    // Load training data in LIBSVM format.
    val data = MLUtils.loadLibSVMFile(sc, "file/data/mllib/input/regression/sample_libsvm_data.txt")

    // Split data into training (60%) and test (40%).
    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
    val training = splits(0).cache()
    val test = splits(1)

    // Run training algorithm to build the model
    val numIterations = 100
    val model = SVMWithSGD.train(training, numIterations)

    // Clear the default threshold.
    model.clearThreshold()

    // Compute raw scores on the test set.
    val scoreAndLabels = test.map { point =>
      val score = model.predict(point.features)
      (score, point.label)
    }

    // Get evaluation metrics.
    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
    val auROC = metrics.areaUnderROC()

    println("Area under ROC = " + auROC)
    println(model.weights)
    println("model.weights.size"+model.weights.size)
    scoreAndLabels.take(10).foreach(println)
    // Save and load model
    val iString = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date())
    val path = "file/data/mllib/output/regression/sample_libsvm_data" + iString + "/result"
    model.save(sc, path)
    val sameModel = SVMModel.load(sc, path)
    println(sameModel.weights)
    sc.stop
  }
}

3.结果:

Area under ROC = 1.0
[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.7337130460075109,-3.2361986136402683,0.3407411848600228,4.831019942362926,5.863207871433172,6.321238002341538,1.1593197179961523,0.8778352514732718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.558082231618573,7.614056524699977,3.972551230278245,-0.5021633363428376,3.09207640817451,6.999028845845552,1.6006255300057055,-6.601633741695543,-7.915879914024223,-13.762363298328623,-11.899726942699795,-1.2377303735277725,19.824751776812263,3.1343657454150278,-1.0350594756177378,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.4677969763849728,7.252915503765597,6.623623028356155,5.038278768232783,-2.043737515450709,-9.093987699516363,-2.6043313081956296,-13.886356127055834,-37.12099067133248,-45.49281781238801,-35.479652617381014,-11.346890588798317,-16.51133656244354,-0.38760085927136384,-7.432579894479004,-6.760641638212063,-1.860486652376188,-1.1136715876899719,-0.18342826150187772,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.266639888626467,5.287217123128184,1.1076731646211178,-3.582849522355471,-15.782169088534003,-16.041900729713934,-26.46429405861315,-34.44342343394481,-33.36902939045033,-31.282077325493017,-26.959936076867262,-18.326592954652114,-27.292246485849912,-21.673902155596295,-26.79620287727465,-11.663453023746262,-6.593135235840258,-3.275504669676387,-1.9128947270910088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.3251283408981753,-1.6169977019091155,-4.185542381575925,-12.181971702924658,-19.841308748111217,-35.219583208022605,-39.12144534521036,-33.52617124784443,-9.919953734486556,-15.222239137263907,-19.397333179179288,-44.66688011489746,-31.583507333269274,-34.45284942390579,-34.30872721844003,-9.925870457369019,-3.672880845856094,-3.7733813794671973,-3.275504669676387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.30134642961022773,-5.922112442774905,-10.028517652256483,-12.68875250405582,-20.064527343694465,-40.35785196894431,-46.134354700416175,-28.241294747423872,-7.798935816472758,18.369609414335795,10.070316299711335,-3.433508198459789,-20.75785383416912,-26.34269986178705,-41.265899136832665,-46.11364604795372,-32.348230354711866,-10.379291752480082,-5.9614184988110255,-3.275504669676387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.5286896049901717,-9.577575654133753,-15.166179988849033,-24.854109896054446,-30.142124999260126,-30.082812592788194,-43.54551377047694,-14.27992660522959,15.333857117615883,25.899216238962413,29.048769222971202,22.324398491333234,10.067460813172092,-5.667714386789703,-42.012714201518875,-66.10201375226168,-57.32017109500564,-35.768510992866176,-10.18026851335422,-3.3410147630699156,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3.4851369685356777,-14.215690266395512,-23.16834948873392,-24.226565190974913,-32.170343791803305,-43.00397963750271,-41.893451235149534,-16.19869892207988,20.190486942784222,48.59356132888608,55.68517319677962,38.270135535483455,31.581220644736185,2.9534139037985443,-34.946909109667146,-79.2273266379669,-64.2391975816933,-45.005434161353556,-17.85805145907566,-4.6381146122617665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-8.241169748905788,-20.72413454452146,-16.40813692327119,-19.47975832514352,-35.76612620798708,-36.679211056944716,-47.0305847981241,-18.664714358726588,36.60117723090712,72.85132723879026,79.31026816707089,57.18414950655739,42.00246658342073,6.232899038257447,-39.09841810441359,-69.16555660488658,-63.37446434889879,-52.23774847199906,-25.31310008725912,-7.952925337974268,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-13.598734764165203,-12.548759528899271,-7.43802210897863,-33.224106790749225,-54.40011569749482,-53.8662889224984,-37.722884375790215,-12.875537271916356,54.123143904502875,94.05545310278474,99.56090033027654,77.27581953122873,38.76587239094889,-6.666115936539715,-39.07021969989996,-65.9162559725676,-62.7062613962847,-57.47855594348125,-34.13075865802797,-12.591039950236027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-8.172014341765763,-11.013338007701577,-36.52244514498772,-63.21120527772269,-70.67769670846462,-56.490856379845155,-35.87427173976792,-5.684823272422454,83.21281695866311,106.60186707374692,108.3553304482068,77.7585007989574,32.87635618150726,-22.57295552653642,-37.96965013088876,-63.269648199469074,-65.15633888920269,-57.75369833573408,-37.28834515959596,-16.560951609883805,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.24893835489540525,-6.6873423255364814,-33.64905723802673,-59.30993699767219,-72.3362451251333,-64.27850363772941,-43.26286567708573,-26.872240310025077,14.313692876474756,94.98025418648962,110.09919980768503,112.16400779780464,64.19259154325928,24.205435341865332,-20.92392382989277,-42.33262235089764,-61.60569182727345,-61.527079715201275,-52.02811617313971,-38.637853083502634,-16.456135460454174,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.4454686350759885,-26.885342328703786,-52.8797473872556,-65.71972569238706,-77.35431827907755,-60.701652538442794,-36.55463211358848,-10.442308886928325,39.753042245923645,98.61815135419039,122.56788444576748,100.92728980478287,54.09625506521514,6.787031358366499,-18.29850705053175,-46.48596227204729,-61.84152816349018,-55.9587217767514,-46.7480026456214,-36.96079469262831,-14.058466042251052,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.6944069899713939,-33.03018908901667,-55.853905627321794,-73.44991671282324,-75.14007712237637,-53.79688869476501,-29.636766251231936,4.074727809077426,49.42217859099944,92.65925976817768,122.16976451666912,85.39683689356868,37.15593349534437,4.35346957228914,-23.549139252208015,-52.194616108837735,-62.174794132527225,-57.609576130268294,-36.489122020194955,-31.62827309039519,-13.704711537925995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.751423922528163,-34.78585959196323,-56.312476281076464,-77.53774654057948,-73.14857028321306,-53.036971611400105,-19.954374447668553,21.199066222145586,50.0772795249347,90.83865948400327,104.32862475968555,74.4173452408135,31.348787442725538,-6.08188072169714,-33.97728886515344,-50.86587964121859,-60.08115149654165,-49.617344736257934,-32.88606688355094,-28.051421991108576,-10.455410905607033,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.6597097917772277,-31.418640791535925,-53.71827658269275,-79.08378474466674,-75.82138209366897,-53.849296769479814,-7.340486966433764,33.32132520447697,56.28164795044792,95.59090375218453,92.88140478179918,62.439642145214265,17.52823221556561,-25.155032456202637,-45.21233353973569,-52.87539886488263,-42.58715038876177,-34.46432267742638,-31.195906473997915,-21.539718707791913,-6.079336666919372,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.4412220546576096,-21.841065137402136,-49.74836492304496,-76.03651621143784,-84.0259028326108,-55.55083175529918,-6.482138224517242,29.433716849655152,52.81679966223357,85.10294861097411,85.38195918477615,47.32354758211306,0.29643071076518707,-32.81253054719098,-39.58111704015505,-37.21473660680637,-25.469102119335755,-31.479672533062296,-26.400567637591674,-14.700464957507611,-1.821180596340072,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.37995854168246074,-12.276591501947092,-38.82053495302669,-59.02487122263307,-75.78351737481397,-55.10853189981655,-24.36706289283027,5.140773723604572,13.066270316792902,34.826507047559225,35.523135498317984,-0.5852491597678443,-23.75047120487471,-31.10840067480707,-16.980264015624588,-34.21365989312294,-27.846167514485728,-23.161179767755886,-19.915068391632435,-5.03117517262293,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3.9437076222903715,-4.461925585042228,-24.262244294138117,-50.83860614517269,-50.36245441616772,-37.042043473961705,-18.14327612977197,-24.232972763488355,-7.998090039227234,-8.692497029198638,-19.34054716726576,-30.17980766227568,-30.620912408226893,-29.771094718601955,-28.633909236040477,-21.00853436503383,-15.547147868598852,-8.398393973050263,-0.5109787284695166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.074365531653855,9.384973570595232,5.495721776072003,-12.373427184205449,-23.505528429247413,-28.485384584789095,-17.69358386146934,-25.358264788512066,-28.115492912323283,-24.75176958078484,-22.946637453371846,-31.65314665667565,-24.857297616763667,-20.379637468455993,-13.383159494027211,-9.924226562848949,-2.7726796568609426,-1.8080785776613666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.4389008059953043,2.317724971516749,4.082762189489032,7.394998235879728,-6.925508179945434,-18.049122038166452,-37.159907921660704,-39.437166212810205,-30.346768218827357,-21.278757238172442,-21.412678985787384,-15.961665520338554,-9.211970608219993,-4.43448073647133,1.8937942853434513,3.9486559253150038,-0.13102018678705546,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.5240807471482218,-3.1313824642106245,-3.183790538925448,1.2577937931557326,1.7556705029465436,-11.569082493297007,-20.622577400282516,-24.802121358789606,-18.303520094151658,-11.110511839542303,-10.416104849570909,-8.59492425323084,-4.782236817727525,-1.428120035978904]
model.weights.size692
(-893656.2467488575,0.0)
(553216.1154887225,1.0)
(-1030793.6628224523,0.0)
(-660018.4363039621,0.0)
(473494.1478776787,1.0)
(474484.8472541839,1.0)
(625896.1173054989,1.0)
(-1179877.5567907898,0.0)
(459173.33117972035,1.0)
(547268.1947242465,1.0)

后面model.save和load好慢,就没等了

参考
【1】http://spark.apache.org/docs/1.5.2/mllib-guide.html
【2】http://spark.apache.org/docs/1.5.2/programming-guide.html
【3】https://github.com/xubo245/SparkLearning
【4】http://blog.csdn.net/macyang/article/details/38782399

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐