配置 Spark JMX

2019/12/30 Blogs

1. 配置 config

为了方便配置到 spark-defaults.conf

添加:

spark.metrics.conf=/usr/hdp/2.6.2.0-205/spark2/conf/metrics.properties

所以监控主要看 metrics.properties,内容如下:

*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource

分发到各节点,重启并提交任务。

2. 查看 JMX 内容

对于 Driver:

http://192.168.33.123:8081/metrics/json/

{"version":"3.0.0","gauges":{"jvm.PS-MarkSweep.count":{"value":2},"jvm.PS-MarkSweep.time":{"value":113},"jvm.PS-Scavenge.count":{"value":8},"jvm.PS-Scavenge.time":{"value":85},"jvm.heap.committed":{"value":286785536},"jvm.heap.init":{"value":130023424},"jvm.heap.max":{"value":954728448},"jvm.heap.usage":{"value":0.1092856426542765},"jvm.heap.used":{"value":104338112},"jvm.non-heap.committed":{"value":52314112},"jvm.non-heap.init":{"value":2555904},"jvm.non-heap.max":{"value":-1},"jvm.non-heap.usage":{"value":-5.11092E7},"jvm.non-heap.used":{"value":51109200},"jvm.pools.Code-Cache.committed":{"value":6619136},"jvm.pools.Code-Cache.init":{"value":2555904},"jvm.pools.Code-Cache.max":{"value":251658240},"jvm.pools.Code-Cache.usage":{"value":0.024344889322916667},"jvm.pools.Code-Cache.used":{"value":6126592},"jvm.pools.Compressed-Class-Space.committed":{"value":5414912},"jvm.pools.Compressed-Class-Space.init":{"value":0},"jvm.pools.Compressed-Class-Space.max":{"value":1073741824},"jvm.pools.Compressed-Class-Space.usage":{"value":0.004871957004070282},"jvm.pools.Compressed-Class-Space.used":{"value":5231224},"jvm.pools.Metaspace.committed":{"value":40280064},"jvm.pools.Metaspace.init":{"value":0},"jvm.pools.Metaspace.max":{"value":-1},"jvm.pools.Metaspace.usage":{"value":0.9868748967231035},"jvm.pools.Metaspace.used":{"value":39751384},"jvm.pools.PS-Eden-Space.committed":{"value":178782208},"jvm.pools.PS-Eden-Space.init":{"value":32505856},"jvm.pools.PS-Eden-Space.max":{"value":322961408},"jvm.pools.PS-Eden-Space.usage":{"value":0.23944089319798853},"jvm.pools.PS-Eden-Space.used":{"value":77330168},"jvm.pools.PS-Old-Gen.committed":{"value":90177536},"jvm.pools.PS-Old-Gen.init":{"value":87031808},"jvm.pools.PS-Old-Gen.max":{"value":716177408},"jvm.pools.PS-Old-Gen.usage":{"value":0.03771124821630788},"jvm.pools.PS-Old-Gen.used":{"value":27007944},"jvm.pools.PS-Survivor-Space.committed":{"value":17825792},"jvm.pools.PS-Survivor-Space.init":{"value":5242880},"jvm.pools.PS-Survivor-Space.max":{"value":17825792},"jvm.pools.PS-Survivor-Space.usage":{"value":0.0},"jvm.pools.PS-Survivor-Space.used":{"value":0},"jvm.total.committed":{"value":339099648},"jvm.total.init":{"value":132579328},"jvm.total.max":{"value":954728447},"jvm.total.used":{"value":155447312},"worker.coresFree":{"value":1},"worker.coresUsed":{"value":3},"worker.executors":{"value":0},"worker.memFree_MB":{"value":3774},"worker.memUsed_MB":{"value":3072}},"counters":{"HiveExternalCatalog.fileCacheHits":{"count":0},"HiveExternalCatalog.filesDiscovered":{"count":0},"HiveExternalCatalog.hiveClientCalls":{"count":0},"HiveExternalCatalog.parallelListingJobCount":{"count":0},"HiveExternalCatalog.partitionsFetched":{"count":0}},"histograms":{"CodeGenerator.compilationTime":{"count":0,"max":0,"mean":0.0,"min":0,"p50":0.0,"p75":0.0,"p95":0.0,"p98":0.0,"p99":0.0,"p999":0.0,"stddev":0.0},"CodeGenerator.generatedClassSize":{"count":0,"max":0,"mean":0.0,"min":0,"p50":0.0,"p75":0.0,"p95":0.0,"p98":0.0,"p99":0.0,"p999":0.0,"stddev":0.0},"CodeGenerator.generatedMethodSize":{"count":0,"max":0,"mean":0.0,"min":0,"p50":0.0,"p75":0.0,"p95":0.0,"p98":0.0,"p99":0.0,"p999":0.0,"stddev":0.0},"CodeGenerator.sourceCodeSize":{"count":0,"max":0,"mean":0.0,"min":0,"p50":0.0,"p75":0.0,"p95":0.0,"p98":0.0,"p99":0.0,"p999":0.0,"stddev":0.0}},"meters":{},"timers":{}}

http://192.168.33.123:4040/metrics/json/

{"version":"3.1.3","gauges":{"app-20191230143432-0000.driver.BlockManager.disk.diskSpaceUsed_MB":{"value":0},"app-20191230143432-0000.driver.BlockManager.memory.maxMem_MB":{"value":3290},"app-20191230143432-0000.driver.BlockManager.memory.memUsed_MB":{"value":0},"app-20191230143432-0000.driver.BlockManager.memory.remainingMem_MB":{"value":3289},"app-20191230143432-0000.driver.DAGScheduler.job.activeJobs":{"value":0},"app-20191230143432-0000.driver.DAGScheduler.job.allJobs":{"value":282},"app-20191230143432-0000.driver.DAGScheduler.stage.failedStages":{"value":0},"app-20191230143432-0000.driver.DAGScheduler.stage.runningStages":{"value":0},"app-20191230143432-0000.driver.DAGScheduler.stage.waitingStages":{"value":0},"app-20191230143432-0000.driver.jvm.PS-MarkSweep.count":{"value":4},"app-20191230143432-0000.driver.jvm.PS-MarkSweep.time":{"value":635},"app-20191230143432-0000.driver.jvm.PS-Scavenge.count":{"value":31},"app-20191230143432-0000.driver.jvm.PS-Scavenge.time":{"value":957},"app-20191230143432-0000.driver.jvm.heap.committed":{"value":875036672},"app-20191230143432-0000.driver.jvm.heap.init":{"value":130023424},"app-20191230143432-0000.driver.jvm.heap.max":{"value":2863661056},"app-20191230143432-0000.driver.jvm.heap.usage":{"value":0.11538090072067524},"app-20191230143432-0000.driver.jvm.heap.used":{"value":330411792},"app-20191230143432-0000.driver.jvm.non-heap.committed":{"value":237002752},"app-20191230143432-0000.driver.jvm.non-heap.init":{"value":2555904},"app-20191230143432-0000.driver.jvm.non-heap.max":{"value":-1},"app-20191230143432-0000.driver.jvm.non-heap.usage":{"value":-2.3153268E8},"app-20191230143432-0000.driver.jvm.non-heap.used":{"value":231532680},"app-20191230143432-0000.driver.jvm.pools.Code-Cache.committed":{"value":69009408},"app-20191230143432-0000.driver.jvm.pools.Code-Cache.init":{"value":2555904},"app-20191230143432-0000.driver.jvm.pools.Code-Cache.max":{"value":251658240},"app-20191230143432-0000.driver.jvm.pools.Code-Cache.usage":{"value":0.27235565185546873},"app-20191230143432-0000.driver.jvm.pools.Code-Cache.used":{"value":68540544},"app-20191230143432-0000.driver.jvm.pools.Compressed-Class-Space.committed":{"value":20578304},"app-20191230143432-0000.driver.jvm.pools.Compressed-Class-Space.init":{"value":0},"app-20191230143432-0000.driver.jvm.pools.Compressed-Class-Space.max":{"value":1073741824},"app-20191230143432-0000.driver.jvm.pools.Compressed-Class-Space.usage":{"value":0.018344499170780182},"app-20191230143432-0000.driver.jvm.pools.Compressed-Class-Space.used":{"value":19697256},"app-20191230143432-0000.driver.jvm.pools.Metaspace.committed":{"value":147415040},"app-20191230143432-0000.driver.jvm.pools.Metaspace.init":{"value":0},"app-20191230143432-0000.driver.jvm.pools.Metaspace.max":{"value":-1},"app-20191230143432-0000.driver.jvm.pools.Metaspace.usage":{"value":0.9720506130175048},"app-20191230143432-0000.driver.jvm.pools.Metaspace.used":{"value":143294880},"app-20191230143432-0000.driver.jvm.pools.PS-Eden-Space.committed":{"value":465567744},"app-20191230143432-0000.driver.jvm.pools.PS-Eden-Space.init":{"value":32505856},"app-20191230143432-0000.driver.jvm.pools.PS-Eden-Space.max":{"value":854065152},"app-20191230143432-0000.driver.jvm.pools.PS-Eden-Space.usage":{"value":0.06378632809502571},"app-20191230143432-0000.driver.jvm.pools.PS-Eden-Space.used":{"value":54477680},"app-20191230143432-0000.driver.jvm.pools.PS-Eden-Space.used-after-gc":{"value":0},"app-20191230143432-0000.driver.jvm.pools.PS-Old-Gen.committed":{"value":326631424},"app-20191230143432-0000.driver.jvm.pools.PS-Old-Gen.init":{"value":87031808},"app-20191230143432-0000.driver.jvm.pools.PS-Old-Gen.max":{"value":2147483648},"app-20191230143432-0000.driver.jvm.pools.PS-Old-Gen.usage":{"value":0.08995729312300682},"app-20191230143432-0000.driver.jvm.pools.PS-Old-Gen.used":{"value":193181816},"app-20191230143432-0000.driver.jvm.pools.PS-Old-Gen.used-after-gc":{"value":157891984},"app-20191230143432-0000.driver.jvm.pools.PS-Survivor-Space.committed":{"value":82837504},"app-20191230143432-0000.driver.jvm.pools.PS-Survivor-Space.init":{"value":5242880},"app-20191230143432-0000.driver.jvm.pools.PS-Survivor-Space.max":{"value":82837504},"app-20191230143432-0000.driver.jvm.pools.PS-Survivor-Space.usage":{"value":0.9993685951715783},"app-20191230143432-0000.driver.jvm.pools.PS-Survivor-Space.used":{"value":82785200},"app-20191230143432-0000.driver.jvm.pools.PS-Survivor-Space.used-after-gc":{"value":82785200},"app-20191230143432-0000.driver.jvm.total.committed":{"value":1112039424},"app-20191230143432-0000.driver.jvm.total.init":{"value":132579328},"app-20191230143432-0000.driver.jvm.total.max":{"value":2863661055},"app-20191230143432-0000.driver.jvm.total.used":{"value":561977376},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastCompletedBatch_processingDelay":{"value":4269},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastCompletedBatch_processingEndTime":{"value":1577688154289},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastCompletedBatch_processingStartTime":{"value":1577688150020},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastCompletedBatch_schedulingDelay":{"value":0},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastCompletedBatch_submissionTime":{"value":1577688150020},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastCompletedBatch_totalDelay":{"value":4269},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastReceivedBatch_processingEndTime":{"value":1577688154289},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastReceivedBatch_processingStartTime":{"value":1577688150020},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastReceivedBatch_records":{"value":10},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.lastReceivedBatch_submissionTime":{"value":1577688150020},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.receivers":{"value":0},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.retainedCompletedBatches":{"value":16},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.runningBatches":{"value":0},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.totalCompletedBatches":{"value":16},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.totalProcessedRecords":{"value":157},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.totalReceivedRecords":{"value":157},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.unprocessedBatches":{"value":0},"app-20191230143432-0000.driver.ml.StreamingMetrics.streaming.waitingBatches":{"value":0}},"counters":{"app-20191230143432-0000.driver.HiveExternalCatalog.fileCacheHits":{"count":0},"app-20191230143432-0000.driver.HiveExternalCatalog.filesDiscovered":{"count":32},"app-20191230143432-0000.driver.HiveExternalCatalog.hiveClientCalls":{"count":0},"app-20191230143432-0000.driver.HiveExternalCatalog.parallelListingJobCount":{"count":0},"app-20191230143432-0000.driver.HiveExternalCatalog.partitionsFetched":{"count":0}},"histograms":{"app-20191230143432-0000.driver.CodeGenerator.compilationTime":{"count":22,"max":438,"mean":48.05451588723314,"min":13,"p50":29.0,"p75":45.0,"p95":98.0,"p98":438.0,"p99":438.0,"p999":438.0,"stddev":77.38735724949991},"app-20191230143432-0000.driver.CodeGenerator.generatedClassSize":{"count":44,"max":11621,"mean":1944.624652973987,"min":532,"p50":1314.0,"p75":2922.0,"p95":4245.0,"p98":11621.0,"p99":11621.0,"p999":11621.0,"stddev":2151.8641213493684},"app-20191230143432-0000.driver.CodeGenerator.generatedMethodSize":{"count":136,"max":3024,"mean":109.08498252029499,"min":1,"p50":15.0,"p75":108.0,"p95":307.0,"p98":790.0,"p99":995.0,"p999":3024.0,"stddev":315.347490214083},"app-20191230143432-0000.driver.CodeGenerator.sourceCodeSize":{"count":22,"max":29335,"mean":4873.428433545155,"min":967,"p50":2463.0,"p75":5656.0,"p95":29335.0,"p98":29335.0,"p99":29335.0,"p999":29335.0,"stddev":6325.7087658508735}},"meters":{},"timers":{"app-20191230143432-0000.driver.DAGScheduler.messageProcessingTime":{"count":5377,"max":39.17997,"mean":0.6072075705660505,"min":9.239999999999999E-4,"p50":0.08406999999999999,"p75":0.170684,"p95":4.179908999999999,"p98":7.990545,"p99":11.113304999999999,"p999":25.713566,"stddev":2.3821932564017647,"m15_rate":5.424543555435747,"m1_rate":11.435735024352416,"m5_rate":9.242628106052958,"mean_rate":11.08907610122998,"duration_units":"milliseconds","rate_units":"calls/second"}}}

对于 Executor:

http://192.168.34.49:8081/metrics/json/

{"version":"3.0.0","gauges":{"jvm.PS-MarkSweep.count":{"value":1},"jvm.PS-MarkSweep.time":{"value":43},"jvm.PS-Scavenge.count":{"value":6},"jvm.PS-Scavenge.time":{"value":51},"jvm.heap.committed":{"value":135266304},"jvm.heap.init":{"value":62914560},"jvm.heap.max":{"value":954728448},"jvm.heap.usage":{"value":0.09274312940552495},"jvm.heap.used":{"value":88544504},"jvm.non-heap.committed":{"value":41680896},"jvm.non-heap.init":{"value":2555904},"jvm.non-heap.max":{"value":-1},"jvm.non-heap.usage":{"value":-3.9952728E7},"jvm.non-heap.used":{"value":39952728},"jvm.pools.Code-Cache.committed":{"value":6291456},"jvm.pools.Code-Cache.init":{"value":2555904},"jvm.pools.Code-Cache.max":{"value":251658240},"jvm.pools.Code-Cache.usage":{"value":0.021732076009114584},"jvm.pools.Code-Cache.used":{"value":5470080},"jvm.pools.Compressed-Class-Space.committed":{"value":4456448},"jvm.pools.Compressed-Class-Space.init":{"value":0},"jvm.pools.Compressed-Class-Space.max":{"value":1073741824},"jvm.pools.Compressed-Class-Space.usage":{"value":0.00395512580871582},"jvm.pools.Compressed-Class-Space.used":{"value":4246784},"jvm.pools.Metaspace.committed":{"value":30932992},"jvm.pools.Metaspace.init":{"value":0},"jvm.pools.Metaspace.max":{"value":-1},"jvm.pools.Metaspace.usage":{"value":0.9775398383706303},"jvm.pools.Metaspace.used":{"value":30238232},"jvm.pools.PS-Eden-Space.committed":{"value":93323264},"jvm.pools.PS-Eden-Space.init":{"value":15728640},"jvm.pools.PS-Eden-Space.max":{"value":344981504},"jvm.pools.PS-Eden-Space.usage":{"value":0.21925105874661616},"jvm.pools.PS-Eden-Space.used":{"value":75637560},"jvm.pools.PS-Old-Gen.committed":{"value":36700160},"jvm.pools.PS-Old-Gen.init":{"value":41943040},"jvm.pools.PS-Old-Gen.max":{"value":716177408},"jvm.pools.PS-Old-Gen.usage":{"value":0.010739238510020132},"jvm.pools.PS-Old-Gen.used":{"value":7691200},"jvm.pools.PS-Survivor-Space.committed":{"value":5242880},"jvm.pools.PS-Survivor-Space.init":{"value":2621440},"jvm.pools.PS-Survivor-Space.max":{"value":5242880},"jvm.pools.PS-Survivor-Space.usage":{"value":0.99482421875},"jvm.pools.PS-Survivor-Space.used":{"value":5215744},"jvm.total.committed":{"value":176947200},"jvm.total.init":{"value":65470464},"jvm.total.max":{"value":954728447},"jvm.total.used":{"value":128502160},"worker.coresFree":{"value":3},"worker.coresUsed":{"value":1},"worker.executors":{"value":1},"worker.memFree_MB":{"value":1782},"worker.memUsed_MB":{"value":1024}},"counters":{"HiveExternalCatalog.fileCacheHits":{"count":0},"HiveExternalCatalog.filesDiscovered":{"count":0},"HiveExternalCatalog.hiveClientCalls":{"count":0},"HiveExternalCatalog.parallelListingJobCount":{"count":0},"HiveExternalCatalog.partitionsFetched":{"count":0}},"histograms":{"CodeGenerator.compilationTime":{"count":0,"max":0,"mean":0.0,"min":0,"p50":0.0,"p75":0.0,"p95":0.0,"p98":0.0,"p99":0.0,"p999":0.0,"stddev":0.0},"CodeGenerator.generatedClassSize":{"count":0,"max":0,"mean":0.0,"min":0,"p50":0.0,"p75":0.0,"p95":0.0,"p98":0.0,"p99":0.0,"p999":0.0,"stddev":0.0},"CodeGenerator.generatedMethodSize":{"count":0,"max":0,"mean":0.0,"min":0,"p50":0.0,"p75":0.0,"p95":0.0,"p98":0.0,"p99":0.0,"p999":0.0,"stddev":0.0},"CodeGenerator.sourceCodeSize":{"count":0,"max":0,"mean":0.0,"min":0,"p50":0.0,"p75":0.0,"p95":0.0,"p98":0.0,"p99":0.0,"p999":0.0,"stddev":0.0}},"meters":{},"timers":{}}

jconsole JMX 监控

继续在 spark-defaults.conf 中暴露 JMX 端口,分别暴露 driver 和 executor 的:

spark.driver.extraJavaOptions=-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=11990
spark.executor.extraJavaOptions=-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=11991

通过终端使用 jconsole 连接:

JMX

Search

    Table of Contents