by
Thomas
The Kodaly Method
The App
import Vision
private let handPoseRequest: VNDetectHumanHandPoseRequest = {
// 1
let request = VNDetectHumanHandPoseRequest()
// 2
request.maximumHandCount = 2
return request
}()
The App
CameraViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(
_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection
) {
var fingerTips: [CGPoint] = []
defer {
DispatchQueue.main.sync {
self.processPoints(fingerTips)
}
}
let handler = VNImageRequestHandler(
cmSampleBuffer: sampleBuffer,
orientation: .up,
options: [:]
)
do {
// Perform VNDetectHumanHandPoseRequest
try handler.perform([handPoseRequest])
// Continue only when at least a hand was detected in the frame. We're interested in maximum of two hands.
guard
let results = handPoseRequest.results?.prefix(2),
!results.isEmpty
else {
return
}
var recognizedPoints: [VNRecognizedPoint] = []
try results.forEach { observation in
// Get points for all fingers.
let fingers = try observation.recognizedPoints(.all)
// Look for tip points.
if let thumbTipPoint = fingers[.thumbTip] {
recognizedPoints.append(thumbTipPoint)
}
if let indexTipPoint = fingers[.indexTip] {
recognizedPoints.append(indexTipPoint)
}
if let middleTipPoint = fingers[.middleTip] {
recognizedPoints.append(middleTipPoint)
}
if let ringTipPoint = fingers[.ringTip] {
recognizedPoints.append(ringTipPoint)
}
if let littleTipPoint = fingers[.littleTip] {
recognizedPoints.append(littleTipPoint)
}
// Look for Distal points
// if let thumbIpPoint = fingers[.thumbIP] {
// recognizedPoints.append(thumbIpPoint)
// }
// if let indexDipPoint = fingers[.indexDIP] {
// recognizedPoints.append(indexDipPoint)
// }
// if let middleDipPoint = fingers[.middleDIP] {
// recognizedPoints.append(middleDipPoint)
// }
// if let ringDipPoint = fingers[.ringDIP] {
// recognizedPoints.append(ringDipPoint)
// }
// if let littleDipPoint = fingers[.littleDIP] {
// recognizedPoints.append(littleDipPoint)
// }
//
// // Look for PIP joint
// if let thumMpPoint = fingers[.thumbMP] {
// recognizedPoints.append(thumMpPoint)
// }
// if let indexPipPoint = fingers[.indexPIP] {
// recognizedPoints.append(indexPipPoint)
// }
// if let middlePipPoint = fingers[.middlePIP] {
// recognizedPoints.append(middlePipPoint)
// }
// if let ringPipPoint = fingers[.ringPIP] {
// recognizedPoints.append(ringPipPoint)
// }
// if let littlePipPoint = fingers[.littlePIP] {
// recognizedPoints.append(littlePipPoint)
// }
//
// // look for MCP point
// if let thumbCmcPoint = fingers[.thumbCMC] {
// recognizedPoints.append(thumbCmcPoint)
// }
// if let indexMcpPoint = fingers[.indexMCP] {
// recognizedPoints.append(indexMcpPoint)
// }
// if let middleMcpPoint = fingers[.middleMCP] {
// recognizedPoints.append(middleMcpPoint)
// }
// if let ringMcpPoint = fingers[.ringMCP] {
// recognizedPoints.append(ringMcpPoint)
// }
// if let littleMcpPoint = fingers[.littleMCP] {
// recognizedPoints.append(littleMcpPoint)
// }
//
// // Look for wrist
// if let wristPoint = fingers[.wrist] {
// recognizedPoints.append(wristPoint)
// }
}
fingerTips = recognizedPoints.filter {
// Ignore low confidence points.
$0.confidence > 0.9
}
.map {
// Convert points from Vision coordinates to AVFoundation coordinates.
CGPoint(x: $0.location.x, y: 1 - $0.location.y)
}
} catch {
cameraFeedSession?.stopRunning()
print(error.localizedDescription)
}
guard let handSignsModel = try?VNCoreMLModel(for: kodalyHand_14().model) else { return }
// Create a Core ML Vision request
let request = VNCoreMLRequest(model: handSignsModel) { (finishedRequest, err) in
// Dealing with the result of the Core ML Vision request
guard let results = finishedRequest.results as? [VNClassificationObservation] else { return }
guard let firstResult = results.first else { return }
var predictionString = ""
DispatchQueue.main.async {
switch firstResult.identifier {
case HandSign.number1.rawValue:
predictionString = "Do"
case HandSign.number2.rawValue:
predictionString = "Re"
case HandSign.number3.rawValue:
predictionString = "Mi"
case HandSign.number4.rawValue:
predictionString = "Fa"
case HandSign.number5.rawValue:
predictionString = "Sol"
case HandSign.number6.rawValue:
predictionString = "La"
case HandSign.number7.rawValue:
predictionString = "Ti"
case HandSign.noHand.rawValue:
predictionString = "No hand"
default:
break
}
print(predictionString)
}
}
try? handler.perform([request])
Thank You🎉