Kodaly EarTrainer

by

Thomas

The Kodaly Method

  • Zoltan Kodaly
  • Movement/gestures
  • Logic behind the signs

The App

import Vision

private let handPoseRequest: VNDetectHumanHandPoseRequest = {
  // 1
  let request = VNDetectHumanHandPoseRequest()
  
  // 2
  request.maximumHandCount = 2
  return request
}()

The App

CameraViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
    func captureOutput(
        _ output: AVCaptureOutput,
        didOutput sampleBuffer: CMSampleBuffer,
        from connection: AVCaptureConnection
    ) {
        var fingerTips: [CGPoint] = []
        
        defer {
            DispatchQueue.main.sync {
                self.processPoints(fingerTips)
            }
        }
        
        let handler = VNImageRequestHandler(
            cmSampleBuffer: sampleBuffer,
            orientation: .up,
            options: [:]
        )
        do {
            // Perform VNDetectHumanHandPoseRequest
            try handler.perform([handPoseRequest])
            
            // Continue only when at least a hand was detected in the frame. We're interested in maximum of two hands.
            guard
                let results = handPoseRequest.results?.prefix(2),
                !results.isEmpty
            else {
                return
            }
            
            var recognizedPoints: [VNRecognizedPoint] = []
            
            try results.forEach { observation in
                // Get points for all fingers.
                let fingers = try observation.recognizedPoints(.all)
                
                // Look for tip points.
                if let thumbTipPoint = fingers[.thumbTip] {
                    recognizedPoints.append(thumbTipPoint)
                }
                if let indexTipPoint = fingers[.indexTip] {
                    recognizedPoints.append(indexTipPoint)
                }
                if let middleTipPoint = fingers[.middleTip] {
                    recognizedPoints.append(middleTipPoint)
                }
                if let ringTipPoint = fingers[.ringTip] {
                    recognizedPoints.append(ringTipPoint)
                }
                if let littleTipPoint = fingers[.littleTip] {
                    recognizedPoints.append(littleTipPoint)
                }
                                 // Look for Distal points
//                if let thumbIpPoint = fingers[.thumbIP] {
//                    recognizedPoints.append(thumbIpPoint)
//                }
//                if let indexDipPoint = fingers[.indexDIP] {
//                    recognizedPoints.append(indexDipPoint)
//                }
//                if let middleDipPoint = fingers[.middleDIP] {
//                    recognizedPoints.append(middleDipPoint)
//                }
//                if let ringDipPoint = fingers[.ringDIP] {
//                    recognizedPoints.append(ringDipPoint)
//                }
//                if let littleDipPoint = fingers[.littleDIP] {
//                    recognizedPoints.append(littleDipPoint)
//                }
//
//                // Look for PIP joint
//                if let thumMpPoint = fingers[.thumbMP] {
//                    recognizedPoints.append(thumMpPoint)
//                }
//                if let indexPipPoint = fingers[.indexPIP] {
//                    recognizedPoints.append(indexPipPoint)
//                }
//                if let middlePipPoint = fingers[.middlePIP] {
//                    recognizedPoints.append(middlePipPoint)
//                }
//                if let ringPipPoint = fingers[.ringPIP] {
//                    recognizedPoints.append(ringPipPoint)
//                }
//                if let littlePipPoint = fingers[.littlePIP] {
//                    recognizedPoints.append(littlePipPoint)
//                }
//
//                // look for MCP point
//                if let thumbCmcPoint = fingers[.thumbCMC] {
//                    recognizedPoints.append(thumbCmcPoint)
//                }
//                if let indexMcpPoint = fingers[.indexMCP] {
//                    recognizedPoints.append(indexMcpPoint)
//                }
//                if let middleMcpPoint = fingers[.middleMCP] {
//                    recognizedPoints.append(middleMcpPoint)
//                }
//                if let ringMcpPoint = fingers[.ringMCP] {
//                    recognizedPoints.append(ringMcpPoint)
//                }
//                if let littleMcpPoint = fingers[.littleMCP] {
//                    recognizedPoints.append(littleMcpPoint)
//                }
//
//                // Look for wrist
//                if let wristPoint = fingers[.wrist] {
//                    recognizedPoints.append(wristPoint)
//                }
            }
            
            fingerTips = recognizedPoints.filter {
                // Ignore low confidence points.
                $0.confidence > 0.9
            }
            .map {
                // Convert points from Vision coordinates to AVFoundation coordinates.
                CGPoint(x: $0.location.x, y: 1 - $0.location.y)
            }
        } catch {
            cameraFeedSession?.stopRunning()
            print(error.localizedDescription)
        }
guard let handSignsModel = try?VNCoreMLModel(for: kodalyHand_14().model) else { return }
        // Create a Core ML Vision request
        let request = VNCoreMLRequest(model: handSignsModel) { (finishedRequest, err) in
            
            // Dealing with the result of the Core ML Vision request
            guard let results = finishedRequest.results as? [VNClassificationObservation] else { return }
            guard let firstResult = results.first else { return }
            var predictionString = ""
            DispatchQueue.main.async {
                switch firstResult.identifier {
                case HandSign.number1.rawValue:
                    predictionString = "Do"
                case HandSign.number2.rawValue:
                    predictionString = "Re"
                case HandSign.number3.rawValue:
                    predictionString = "Mi"
                case HandSign.number4.rawValue:
                    predictionString = "Fa"
                case HandSign.number5.rawValue:
                    predictionString = "Sol"
                case HandSign.number6.rawValue:
                    predictionString = "La"
                case HandSign.number7.rawValue:
                    predictionString = "Ti"
                case HandSign.noHand.rawValue:
                    predictionString = "No hand"
                default:
                    break
                }
                print(predictionString)
            }
        }
        try? handler.perform([request])

Thank You🎉

deck

By thomanda