-
Notifications
You must be signed in to change notification settings - Fork 326
Description
When falling back to the VoiceOver text-to-speech engine, we currently pass AVSpeechSynthesizer an AVSpeechUtterance created from the plain text representation of the spoken instruction, in some cases marked up with the IPA pronunciation of the road name. But the spoken instruction’s SSML representation contains more information about prosody, volume, embedded foreign languages, and how to interpret abbreviations and numbers, which we’re dropping on the floor.
iOS 16 introduces AVSpeechUtterance(ssmlRepresentation:) for creating an utterance directly from SSML source code. For iOS 16 and above, we would short-circuit the code that marks up the attributed string with IPA in favor of this initializer. I’m unsure how AVSpeechUtterance handles the proprietary Amazon Polly SSML attributes and tags that appear in the Mapbox Voice API output. We may need to strip them out using either a regular expression (fragile) or XMLDocument (slow). While we’re at it, we could check whether Apple has fixed some of the caveats we identified in #624: that the Alex voice doesn’t support attributed text and that the other voices don’t support the IPA symbols ɡ and ɹ.
| utterance = AVSpeechUtterance(attributedString: modifiedInstruction.attributedText(for: legProgress)) |
| extension NSAttributedString { | |
| public func pronounced(_ pronunciation: String) -> NSAttributedString { | |
| let phoneticWords = pronunciation.components(separatedBy: " ") | |
| let phoneticString = NSMutableAttributedString() | |
| for (word, phoneticWord) in zip(string.components(separatedBy: " "), phoneticWords) { | |
| // AVSpeechSynthesizer doesn’t recognize some common IPA symbols. | |
| let phoneticWord = phoneticWord.byReplacing([("ɡ", "g"), ("ɹ", "r")]) | |
| if phoneticString.length > 0 { | |
| phoneticString.append(NSAttributedString(string: " ")) | |
| } | |
| phoneticString.append(NSAttributedString(string: word, attributes: [ | |
| NSAttributedString.Key(rawValue: AVSpeechSynthesisIPANotationAttribute): phoneticWord | |
| ])) | |
| } | |
| return phoneticString | |
| } | |
| } | |
| extension SpokenInstruction { | |
| func attributedText(for legProgress: RouteLegProgress) -> NSAttributedString { | |
| let attributedText = NSMutableAttributedString(string: text) | |
| if let step = legProgress.upcomingStep, | |
| let name = step.names?.first, | |
| let phoneticName = step.phoneticNames?.first { | |
| let nameRange = attributedText.mutableString.range(of: name) | |
| if (nameRange.location != NSNotFound) { | |
| attributedText.replaceCharacters(in: nameRange, with: NSAttributedString(string: name).pronounced(phoneticName)) | |
| } | |
| } | |
| if let step = legProgress.followOnStep, | |
| let name = step.names?.first, | |
| let phoneticName = step.phoneticNames?.first { | |
| let nameRange = attributedText.mutableString.range(of: name) | |
| if (nameRange.location != NSNotFound) { | |
| attributedText.replaceCharacters(in: nameRange, with: NSAttributedString(string: name).pronounced(phoneticName)) | |
| } | |
| } | |
| return attributedText | |
| } | |
| } |