(适用于hadoop 2.7及以上版本)
涉及到RESTful API
- ResourceManager REST API’s:
https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html - WebHDFS REST API:
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/WebHDFS.html - MapReduce History Server REST API’s:
https://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html - Spark Monitoring and Instrumentation
http://spark.apache.org/docs/latest/monitoring.html
1. 统计HDFS文件系统实时使用情况
{
"ContentSummary":
{
"directoryCount": 2,
"fileCount" : 1,
"length" : 24930,
"quota" : -1,
"spaceConsumed" : 24930,
"spaceQuota" : -1
}
}
- 关于返回结果的说明:
{
"name" : "ContentSummary",
"properties":
{
"ContentSummary":
{
"type" : "object",
"properties":
{
"directoryCount":
{
"description": "The number of directories.",
"type" : "integer",
"required" : true
},
"fileCount":
{
"description": "The number of files.",
"type" : "integer",
"required" : true
},
"length":
{
"description": "The number of bytes used by the content.",
"type" : "integer",
"required" : true
},
"quota":
{
"description": "The namespace quota of this directory.",
"type" : "integer",
"required" : true
},
"spaceConsumed":
{
"description": "The disk space consumed by the content.",
"type" : "integer",
"required" : true
},
"spaceQuota":
{
"description": "The disk space quota.",
"type" : "integer",
"required" : true
}
}
}
}
}
- 注意length与spaceConsumed的关系,跟hdfs副本数有关。
- 如果要统计各个组工作目录的使用情况,使用如下请求:
http://emr-header-1:50070/webhdfs/v1/user/feed_aliyun?user.name=hadoop&op=GETCONTENTSUMMARY
2. 查看集群的实时信息和状态
- URL
http://emr-header-1:8088/ws/v1/cluster
- 返回结果
{
"clusterInfo": {
"id": 1495123166259,
"startedOn": 1495123166259,
"state": "STARTED",
"haState": "ACTIVE",
"rmStateStoreName": "org.apache.hadoop.yarn.server.resourcemanager.recovery.NullRMStateStore",
"resourceManagerVersion": "2.7.2",
"resourceManagerBuildVersion": "2.7.2 from 4bee04d3d1c27d7ef559365d3bdd2a8620807bfc by root source checksum c63f7cc71b8f63249e35126f0f7492d",
"resourceManagerVersionBuiltOn": "2017-04-17T12:28Z",
"hadoopVersion": "2.7.2",
"hadoopBuildVersion": "2.7.2 from 4bee04d3d1c27d7ef559365d3bdd2a8620807bfc by root source checksum 3329b146070a2bc9e249fa9ba9fb55",
"hadoopVersionBuiltOn": "2017-04-17T12:18Z",
"haZooKeeperConnectionState": "ResourceManager HA is not enabled."
}
}
3. 查看资源队列的实时信息,包括队列的配额信息、资源使用实时情况
- URL
http://emr-header-1:8088/ws/v1/cluster/scheduler
- 返回结果
{
"scheduler": {
"schedulerInfo": {
"type": "capacityScheduler",
"capacity": 100,
"usedCapacity": 0,
"maxCapacity": 100,
"queueName": "root",
"queues": {
"queue": [
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 1,
"usedCapacity": 0,
"maxCapacity": 90,
"absoluteCapacity": 1,
"absoluteMaxCapacity": 90,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "algorithm_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 100,
"maxApplicationsPerUser": 100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 11776,
"vCores": 7
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 160,
"vCores": 1
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 1,
"usedCapacity": 0,
"maxCapacity": 90,
"absoluteCapacity": 1,
"absoluteMaxCapacity": 90,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "dcps_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 100,
"maxApplicationsPerUser": 100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 11776,
"vCores": 7
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 160,
"vCores": 1
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 31,
"usedCapacity": 0,
"maxCapacity": 100,
"absoluteCapacity": 31,
"absoluteMaxCapacity": 100,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "default",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 3100,
"maxApplicationsPerUser": 3100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 13088,
"vCores": 8
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 4064,
"vCores": 3
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 15.000001,
"usedCapacity": 0,
"maxCapacity": 100,
"absoluteCapacity": 15.000001,
"absoluteMaxCapacity": 100,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "feed_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 1500,
"maxApplicationsPerUser": 7500,
"userLimit": 100,
"users": null,
"userLimitFactor": 5,
"AMResourceLimit": {
"memory": 12320,
"vCores": 8
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 9856,
"vCores": 7
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 51,
"usedCapacity": 0,
"maxCapacity": 90,
"absoluteCapacity": 51,
"absoluteMaxCapacity": 90,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "hot_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 5100,
"maxApplicationsPerUser": 5100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 11776,
"vCores": 7
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 6688,
"vCores": 5
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 1,
"usedCapacity": 0,
"maxCapacity": 90,
"absoluteCapacity": 1,
"absoluteMaxCapacity": 90,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "push_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 100,
"maxApplicationsPerUser": 100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 11776,
"vCores": 7
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 160,
"vCores": 1
},
"preemptionDisabled": true
}
]
}
}
}
}
4. 查看实时的作业列表,列表信息中也包含了作业运行的详情信息,包括作业名称、id、运行状态、起止时间,资源使用情况。
- URL
http://emr-header-1:8088/ws/v1/cluster/apps
- 返回结果
{
"apps":
{
"app":
[
{
"finishedTime" : 1326815598530,
"amContainerLogs" : "http://host.domain.com:8042/node/containerlogs/container_1326815542473_0001_01_000001",
"trackingUI" : "History",
"state" : "FINISHED",
"user" : "user1",
"id" : "application_1326815542473_0001",
"clusterId" : 1326815542473,
"finalStatus" : "SUCCEEDED",
"amHostHttpAddress" : "host.domain.com:8042",
"progress" : 100,
"name" : "word count",
"startedTime" : 1326815573334,
"elapsedTime" : 25196,
"diagnostics" : "",
"trackingUrl" : "http://host.domain.com:8088/proxy/application_1326815542473_0001/jobhistory/job/job_1326815542473_1_1",
"queue" : "default",
"allocatedMB" : 0,
"allocatedVCores" : 0,
"runningContainers" : 0,
"memorySeconds" : 151730,
"vcoreSeconds" : 103
},
{
"finishedTime" : 1326815789546,
"amContainerLogs" : "http://host.domain.com:8042/node/containerlogs/container_1326815542473_0002_01_000001",
"trackingUI" : "History",
"state" : "FINISHED",
"user" : "user1",
"id" : "application_1326815542473_0002",
"clusterId" : 1326815542473,
"finalStatus" : "SUCCEEDED",
"amHostHttpAddress" : "host.domain.com:8042",
"progress" : 100,
"name" : "Sleep job",
"startedTime" : 1326815641380,
"elapsedTime" : 148166,
"diagnostics" : "",
"trackingUrl" : "http://host.domain.com:8088/proxy/application_1326815542473_0002/jobhistory/job/job_1326815542473_2_2",
"queue" : "default",
"allocatedMB" : 0,
"allocatedVCores" : 0,
"runningContainers" : 1,
"memorySeconds" : 640064,
"vcoreSeconds" : 442
}
]
}
}
- 如果要统计固定时间段的,可以加上"?finishedTimeBegin={时间戳}&finishedTimeEnd={时间戳}"参数,例如
http://emr-header-1:8088/ws/v1/cluster/apps?finishedTimeBegin=1496742124000&finishedTimeEnd=1496742134000
5. 统计作业扫描的数据量情况
job扫描的数据量,需要通过History Server的RESTful API查询,MapReduce的和Spark的又有一些差异。
5.1 Mapreduce job扫描数据量
- URL
http://emr-header-1:19888/ws/v1/history/mapreduce/jobs/job_1495123166259_0962/counters
- 返回结果
{
"jobCounters" : {
"id" : "job_1326381300833_2_2",
"counterGroup" : [
{
"counterGroupName" : "Shuffle Errors",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "BAD_ID"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "CONNECTION"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "IO_ERROR"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "WRONG_LENGTH"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "WRONG_MAP"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "WRONG_REDUCE"
}
]
},
{
"counterGroupName" : "org.apache.hadoop.mapreduce.FileSystemCounter",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2483,
"name" : "FILE_BYTES_READ"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 108525,
"name" : "FILE_BYTES_WRITTEN"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "FILE_READ_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "FILE_LARGE_READ_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "FILE_WRITE_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 48,
"name" : "HDFS_BYTES_READ"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "HDFS_BYTES_WRITTEN"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1,
"name" : "HDFS_READ_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "HDFS_LARGE_READ_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "HDFS_WRITE_OPS"
}
]
},
{
"counterGroupName" : "org.apache.hadoop.mapreduce.TaskCounter",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1,
"name" : "MAP_INPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1200,
"name" : "MAP_OUTPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 4800,
"name" : "MAP_OUTPUT_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2235,
"name" : "MAP_OUTPUT_MATERIALIZED_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 48,
"name" : "SPLIT_RAW_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "COMBINE_INPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "COMBINE_OUTPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1200,
"name" : "REDUCE_INPUT_GROUPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2235,
"name" : "REDUCE_SHUFFLE_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1200,
"name" : "REDUCE_INPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "REDUCE_OUTPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2400,
"name" : "SPILLED_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1,
"name" : "SHUFFLED_MAPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "FAILED_SHUFFLE"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1,
"name" : "MERGED_MAP_OUTPUTS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 113,
"name" : "GC_TIME_MILLIS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1830,
"name" : "CPU_MILLISECONDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 478068736,
"name" : "PHYSICAL_MEMORY_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2159284224,
"name" : "VIRTUAL_MEMORY_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 378863616,
"name" : "COMMITTED_HEAP_BYTES"
}
]
},
{
"counterGroupName" : "org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "BYTES_READ"
}
]
},
{
"counterGroupName" : "org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "BYTES_WRITTEN"
}
]
}
]
}
}
其中org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter里面的BYTES_READ为job扫描的数据量
具体参数:https://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html#Job_Counters_API
5.2 Mapreduce job扫描数据量
- URL
http://emr-header-1:18080/api/v1/applications/application_1495123166259_1050/executors
每个executor的totalInputBytes总和为整个job的数据扫描量。
更多参考:http://spark.apache.org/docs/latest/monitoring.html