coreml-service-yolov8/Sources/App/RecognitionController.swift
2024-04-18 14:30:03 +02:00

96 lines
3.2 KiB
Swift

import Vapor
import AVFoundation
import CoreImage
import Vision
class RecognitionController: RouteCollection {
private var modelsPath: String
init(modelsPath: String) {
self.modelsPath = modelsPath
}
var classes:[String] = []
var yoloRequest:VNCoreMLRequest?
func loadModel() throws {
let modelURL = URL(fileURLWithPath: modelsPath).appendingPathComponent("yolov8m-oiv7.mlmodelc")
let model = try MLModel(contentsOf: modelURL, configuration: MLModelConfiguration())
guard let classes = model.modelDescription.classLabels as? [String] else {
fatalError()
}
self.classes = classes
let vnModel = try VNCoreMLModel(for: model)
yoloRequest = VNCoreMLRequest(model: vnModel)
}
func boot(routes: RoutesBuilder) throws {
routes.on(.POST,
"recognize",
body: .collect(maxSize: ByteCount(value: 2000*1024)),
use: recognize
)
}
func recognize(req: Request) async throws -> BboxResponse {
guard yoloRequest != nil else {
throw ModelError.notLoaded
}
let request = try req.content.decode(String.self)
guard let dataDecoded : Data = Data(base64Encoded: request, options: .ignoreUnknownCharacters) else {
return BboxResponse(detections: [])
}
let ciImage = CIImage(data: dataDecoded)!
var pixelBuffer: CVPixelBuffer?
let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue,
kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] as CFDictionary
let width:Int = Int(ciImage.extent.width)
let height:Int = Int(ciImage.extent.height)
CVPixelBufferCreate(kCFAllocatorDefault,
width,
height,
kCVPixelFormatType_32BGRA,
attrs,
&pixelBuffer)
let context = CIContext()
context.render(ciImage, to: pixelBuffer!)
let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer!)
try handler.perform([yoloRequest!])
guard let results = yoloRequest!.results as? [VNRecognizedObjectObservation] else {
return BboxResponse(detections: [])
}
var detections:[Detection] = []
for result in results {
guard let label = result.labels.first?.identifier as? String else {
return BboxResponse(detections: [])
}
let detection = Detection(prob: result.confidence, category: label, x: Float(result.boundingBox.minX * ciImage.extent.width), y: Float((1 - result.boundingBox.maxY) * ciImage.extent.height), w: Float(result.boundingBox.width * ciImage.extent.width), h: Float(result.boundingBox.height * ciImage.extent.height))
detections.append(detection)
}
return BboxResponse(detections: detections)
}
}
struct BboxResponse: Content {
let detections: [Detection]
}
struct Detection: Codable {
let prob:Float
let category:String?
let x: Float
let y : Float
let w: Float
let h: Float
}
public enum ModelError: Error {
case notLoaded
}