feat(hos_client_create, hos_client_destory): 多次调用destory不会导致重复释放

This commit is contained in:
彭宣正
2020-12-14 17:24:58 +08:00
parent 505d529c32
commit 10b370e486
55976 changed files with 8544395 additions and 2 deletions

View File

@@ -0,0 +1,126 @@
add_project(aws-cpp-sdk-text-to-speech
"High-level C++ SDK for Polly"
aws-cpp-sdk-polly
aws-cpp-sdk-core)
include(CheckIncludeFiles)
file( GLOB TEXT_TO_SPEECH_HEADERS "include/aws/text-to-speech/*.h" )
set(PLATFORM_LIBS "")
check_include_files("pulse/simple.h" HAVE_PULSE)
if(PLATFORM_WINDOWS)
file( GLOB TEXT_TO_SPEECH_PLATFORM_HEADERS "include/aws/text-to-speech/windows/*.h" )
file( GLOB TEXT_TO_SPEECH_PLATFORM_SOURCE "source/text-to-speech/windows/*.cpp" )
add_definitions("-DWAVE_OUT")
set(PLATFORM_LIBS ${PLATFORM_LIBS} Winmm)
endif()
if (PLATFORM_LINUX)
if(HAVE_PULSE)
file( GLOB TEXT_TO_SPEECH_PLATFORM_HEADERS "include/aws/text-to-speech/linux/*.h" )
file( GLOB TEXT_TO_SPEECH_PLATFORM_SOURCE "source/text-to-speech/linux/*.cpp" )
message(STATUS "Pulse audio header files have been detected, included pulse audio as a possible sound driver implementation.")
add_definitions("-DPULSE")
set(PLATFORM_LIBS ${PLATFORM_LIBS} pulse pulse-simple)
else()
message(WARNING "We've detected that you are building on linux, but the header files for pulseaudio are not available.\
If you are providing your own audio implementation or you will not be using the text-to-speech library, this is fine.\
However, if you are not providing your own implemenation and you want to use text-to-speech, you need to install the dev files for pulseAudio.\
e.g. apt-get install libpulse-dev or yum install pulseaudio-libs-devel. Currently, no audio implementation will be built into this library.")
endif()
endif()
if(PLATFORM_APPLE)
find_library(COREAUDIO_LIBRARY CoreAudio)
if (${COREAUDIO_LIBRARY} STREQUAL "${COREAUDIO_LIBRARY}-NOTFOUND")
if (HAVE_PULSE)
file( GLOB TEXT_TO_SPEECH_PLATFORM_HEADERS "include/aws/text-to-speech/linux/*.h" )
file( GLOB TEXT_TO_SPEECH_PLATFORM_SOURCE "source/text-to-speech/linux/*.cpp" )
message(STATUS "Pulse audio header files have been detected, included pulse audio as a possible sound driver implementation.")
add_definitions("-DPULSE")
set(PLATFORM_LIBS ${PLATFORM_LIBS} pulse pulse-simple)
else()
message(FATAL "We've detected that you are building on macos, but there is no coreAudio library and pulseAudio library available.\
If you are providing your own audio implementation or you will not be using the text-to-speech library, this is fine.\
However, if you are not providing your own implemenation and you want to use text-to-speech, you need to install the dev files for coreAudio or pulseAudio.\
e.g. apt-get install libpulse-dev or yum install pulseaudio-libs-devel. Currently, no audio implementation will be built into this library.")
endif()
else()
file( GLOB TEXT_TO_SPEECH_PLATFORM_HEADERS "include/aws/text-to-speech/apple/*.h" )
file( GLOB TEXT_TO_SPEECH_PLATFORM_SOURCE "source/text-to-speech/apple/*.cpp" )
find_path(COREAUDIO_INCLUDE_DIR CoreAudioTypes.h)
include_directories(${COREAUDIO_INCLUDE_DIR})
set(PLATFORM_LIBS ${PLATFORM_LIBS} ${COREAUDIO_LIBRARY})
find_library(AUDIOTOOLBOX_LIBRARY AudioToolbox)
find_path(AUDIOTOOLBOX_INCLUDE_DIR AudioQueue.h)
include_directories(${AUDIOTOOLBOX_INCLUDE_DIR})
set(PLATFORM_LIBS ${PLATFORM_LIBS} ${AUDIOTOOLBOX_LIBRARY})
find_library(FOUNDATION_LIBRARY CoreFoundation)
set(PLATFORM_LIBS ${PLATFORM_LIBS} ${FOUNDATION_LIBRARY})
add_definitions("-DCORE_AUDIO")
add_definitions("-Wno-gnu-zero-variadic-macro-arguments -Wno-four-char-constants -Wno-nullability-extension")
endif()
endif()
file( GLOB TEXT_TO_SPEECH_SOURCE "source/text-to-speech/*.cpp" )
if(MSVC)
source_group("Header Files\\aws\\text-to-speech" FILES ${TEXT_TO_SPEECH_HEADERS})
source_group("Header Files\\aws\\text-to-speech\\windows" FILES ${TEXT_TO_SPEECH_PLATFORM_HEADERS})
source_group("Source Files\\text-to-speech" FILES ${TEXT_TO_SPEECH_SOURCE})
source_group("Source Files\\text-to-speech\\windows" FILES ${TEXT_TO_SPEECH_PLATFORM_SOURCE})
endif()
file(GLOB ALL_TEXT_TO_SPEECH_HEADERS
${TEXT_TO_SPEECH_HEADERS}
${TEXT_TO_SPEECH_PLATFORM_HEADERS}
)
file(GLOB ALL_TEXT_TO_SPEECH_SOURCE
${TEXT_TO_SPEECH_SOURCE}
${TEXT_TO_SPEECH_PLATFORM_SOURCE}
)
file(GLOB ALL_TEXT_TO_SPEECH
${ALL_TEXT_TO_SPEECH_HEADERS}
${ALL_TEXT_TO_SPEECH_SOURCE}
)
set(TEXT_TO_SPEECH_INCLUDES
"${CMAKE_CURRENT_SOURCE_DIR}/include/"
)
include_directories(${TEXT_TO_SPEECH_INCLUDES})
if(USE_WINDOWS_DLL_SEMANTICS AND BUILD_SHARED_LIBS)
add_definitions("-DAWS_TEXT_TO_SPEECH_EXPORTS")
endif()
add_library(${PROJECT_NAME} ${ALL_TEXT_TO_SPEECH})
add_library(AWS::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
set_compiler_flags(${PROJECT_NAME})
set_compiler_warnings(${PROJECT_NAME})
target_include_directories(${PROJECT_NAME} PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
target_link_libraries(${PROJECT_NAME} PRIVATE ${PROJECT_LIBS} ${PLATFORM_DEP_LIBS} ${PLATFORM_LIBS})
setup_install()
install (FILES ${ALL_TEXT_TO_SPEECH_HEADERS} DESTINATION ${INCLUDE_DIRECTORY}/aws/text-to-speech)
do_packaging()

View File

@@ -0,0 +1,105 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#pragma once
#include <aws/text-to-speech/TextToSpeech_EXPORTS.h>
#include <aws/core/utils/Array.h>
#include <aws/core/utils/memory/stl/AWSString.h>
#include <aws/core/utils/memory/stl/AWSVector.h>
namespace Aws
{
namespace TextToSpeech
{
static const unsigned BIT_WIDTH_16 = 16;
static const unsigned MONO = 1;
static const size_t KHZ_22_5 = 22050;
static const size_t KHZ_16 = 16000;
static const size_t KHZ_8 = 8000;
struct CapabilityInfo
{
CapabilityInfo() : channels(MONO), sampleRate(KHZ_16), sampleWidthBits(BIT_WIDTH_16) {}
unsigned channels;
size_t sampleRate;
unsigned sampleWidthBits;
};
struct DeviceInfo
{
Aws::String deviceName;
Aws::String deviceId;
Aws::Vector<CapabilityInfo> capabilities;
};
/**
* Abstract class for configuring and sending linear pcm data to a sound card.
*/
class AWS_TEXT_TO_SPEECH_API PCMOutputDriver
{
public:
virtual ~PCMOutputDriver();
/**
* Write a buffer of audio data to a device. The format of this data will be set in the SetActiveDevice function.
* returns true if audio was successfully written to the device, false otherwise.
*/
virtual bool WriteBufferToDevice(const unsigned char*, size_t) = 0;
/**
* Enumerate all devices and their capabilities. The device id and the capability will be returned to you
* when a user calls SetActiveDevice();
*/
virtual Aws::Vector<DeviceInfo> EnumerateDevices() const = 0;
/**
* Set the device and format that this driver should use. After this call, you should expect all calls to WriteBufferToDevice()
* to be for the specified device and format.
*/
virtual void SetActiveDevice(const DeviceInfo&, const CapabilityInfo&) = 0;
/**
* Get a friendly name for this driver.
*/
virtual const char* GetName() const = 0;
/**
* Not used for most implementations. Some operating systems however, expect a constant stream of data to the device.
* Since playing will be batchy, Prime() will be called before each new batch of audio data. This is the chance to reset
* the device so it can play the audio.
*/
virtual void Prime() {};
/**
* Not used for most implementations. Some operating systems however, expect a constant stream of data to the device.
* Since playing will be batchy, Flush will be called after each batch of audio so that you can pause or reset the audio device.
*/
virtual void Flush() {};
};
/**
* Abstract factory for returning implementations of output drivers to the application. Provide an implementation of this
* if you want to override operating system defaults.
*/
class AWS_TEXT_TO_SPEECH_API PCMOutputDriverFactory
{
public:
virtual ~PCMOutputDriverFactory() = default;
/**
* Return a list of drivers that you want supported for the application.
*/
virtual Aws::Vector<std::shared_ptr<PCMOutputDriver>> LoadDrivers() const = 0;
};
/**
* Returns a default factory. This factory handles operating system defaults:
* Windows - WaveOut
* Linux - PulseAudio
* Apple - CoreAudio
*/
AWS_TEXT_TO_SPEECH_API std::shared_ptr<PCMOutputDriverFactory> DefaultPCMOutputDriverFactoryInitFn();
}
}

View File

@@ -0,0 +1,107 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#pragma once
#include <aws/text-to-speech/TextToSpeech_EXPORTS.h>
#include <aws/text-to-speech/PCMOutputDriver.h>
#include <aws/polly/PollyClient.h>
#include <aws/core/client/AsyncCallerContext.h>
#include <functional>
#include <memory>
#include <mutex>
namespace Aws
{
namespace TextToSpeech
{
/**
* Callback for handling notifications that the SendTextToOutputDevice operation has finished.
* @arg1 the text that was sent
* @arg2 the outcome of the operation from the polly service.
* @arg3 whether or not the audio stream was successfully sent to the audio driver.
*/
typedef std::function<void(const char*, const Polly::Model::SynthesizeSpeechOutcome&, bool)> SendTextCompletedHandler;
typedef std::pair<DeviceInfo, std::shared_ptr<PCMOutputDriver>> OutputDevicePair;
typedef Aws::Vector<OutputDevicePair> OutputDeviceList;
/**
* Maximum bytes size of audio to be sent to PCM drivers by TextToSpeechManager in one operation
* with PCMOutputDriver::WriteBufferToDevice()
*/
static const size_t BUFF_SIZE = 8192;
/**
* Manager for rendering text to the Polly service and then sending directly to an audio driver.
* By default this uses our best guess at the correct drivers for you operating system.
* On windows, this is the WaveOut API.
* On Linux, this is PulseAudio
* On Apple, this is CoreAudio.
*
* The drivers used can be arbitrarily overridden to send the stream anywhere you want. Simply provide your own driverFactory implementation.
*/
class AWS_TEXT_TO_SPEECH_API TextToSpeechManager : public std::enable_shared_from_this<TextToSpeechManager>
{
public:
/**
* Create a TextToSpeechManager instance initialized with a polly client and a driver factory.
* If driver factory is nullptr, we will create a default implementation for your operating system.
*/
static std::shared_ptr<TextToSpeechManager> Create(const std::shared_ptr<Polly::PollyClient>& pollyClient,
const std::shared_ptr<PCMOutputDriverFactory>& driverFactory = nullptr);
~TextToSpeechManager();
TextToSpeechManager(const TextToSpeechManager&) = delete;
TextToSpeechManager& operator=(const TextToSpeechManager&) = delete;
TextToSpeechManager(TextToSpeechManager&&) = delete;
TextToSpeechManager& operator=(TextToSpeechManager&&) = delete;
/**
* Sends @text to the Polly Service, once the audio stream is returned, the audio stream is sent to your audio driver.
* @callback will be invoked once the entire operation has finished.
*/
void SendTextToOutputDevice(const char* text, SendTextCompletedHandler callback);
/**
* Enumerate all devices and their capabilities from the installed drivers. On some operating systems,
* the ability to choose devices is limited. On windows, this will be more detailed. Call this function
* to determine what to pass to SetActiveDevice().
*/
OutputDeviceList EnumerateDevices() const;
/**
* Sets the active driver (if there are multiple possbilities), the device to use for that driver, and the
* audio format to configure the device for. This format will also be used for calls to the Polly service.
*/
void SetActiveDevice(const std::shared_ptr<PCMOutputDriver>&, const DeviceInfo&, const CapabilityInfo&);
/**
* Lists all available voices and their language. You can use this function to determine what to pass to the SetActiveVoice()
* function. The first member of the pair is the voice, the second is the language.
*/
Aws::Vector<std::pair<Aws::String, Aws::String>> ListAvailableVoices() const;
/**
* Sets the active voice for use with the Polly Service.
*/
void SetActiveVoice(const Aws::String& voice);
private:
TextToSpeechManager(const std::shared_ptr<Polly::PollyClient>& pollyClient,
const std::shared_ptr<PCMOutputDriverFactory>& driverFactory);
void OnPollySynthSpeechOutcomeRecieved(const Polly::PollyClient*, const Polly::Model::SynthesizeSpeechRequest&,
const Polly::Model::SynthesizeSpeechOutcome&, const std::shared_ptr<const Aws::Client::AsyncCallerContext>&) const;
Polly::PollyClient* m_pollyClient;
std::shared_ptr<PCMOutputDriver> m_activeDriver;
Aws::Vector<std::shared_ptr<PCMOutputDriver>> m_drivers;
std::atomic<Polly::Model::VoiceId> m_activeVoice;
CapabilityInfo m_selectedCaps;
mutable std::mutex m_driverLock;
};
}
}

View File

@@ -0,0 +1,28 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#pragma once
#ifdef _MSC_VER
//disable windows complaining about max template size.
#pragma warning (disable : 4503)
#endif
#if defined (USE_WINDOWS_DLL_SEMANTICS) || defined (_WIN32)
#ifdef _MSC_VER
#pragma warning(disable : 4251)
#endif // _MSC_VER
#ifdef USE_IMPORT_EXPORT
#ifdef AWS_TEXT_TO_SPEECH_EXPORTS
#define AWS_TEXT_TO_SPEECH_API __declspec(dllexport)
#else
#define AWS_TEXT_TO_SPEECH_API __declspec(dllimport)
#endif // AWS_TEXT_TO_SPEECH_EXPORTS
#else // USE_IMPORT_EXPORT
#define AWS_TEXT_TO_SPEECH_API
#endif // USE_IMPORT_EXPORT
#else // defined (USE_WINDOWS_DLL_SEMANTICS) || defined (_WIN32)
#define AWS_TEXT_TO_SPEECH_API
#endif // defined (USE_WINDOWS_DLL_SEMANTICS) || defined (_WIN32)

View File

@@ -0,0 +1,57 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#pragma once
#include <aws/text-to-speech/PCMOutputDriver.h>
#include <aws/core/utils/memory/stl/AWSQueue.h>
#include <aws/core/utils/Array.h>
#include <AudioQueue.h>
#include <mutex>
#include <condition_variable>
namespace Aws
{
namespace TextToSpeech
{
/**
* Apple implementation for PCM output.
*/
class CoreAudioPCMOutputDriver : public PCMOutputDriver
{
public:
CoreAudioPCMOutputDriver();
virtual ~CoreAudioPCMOutputDriver();
CoreAudioPCMOutputDriver(const CoreAudioPCMOutputDriver&) = delete;
CoreAudioPCMOutputDriver& operator=(const CoreAudioPCMOutputDriver&) = delete;
CoreAudioPCMOutputDriver(CoreAudioPCMOutputDriver&&) = delete;
CoreAudioPCMOutputDriver& operator=(CoreAudioPCMOutputDriver&&) = delete;
bool WriteBufferToDevice(const unsigned char* buffer, size_t bufferSize) override;
Aws::Vector<DeviceInfo> EnumerateDevices() const override;
void SetActiveDevice(const DeviceInfo& device, const CapabilityInfo& capability) override;
const char* GetName() const override;
void Prime() override;
void Flush() override;
private:
void InitDevice();
void CleanUp();
static void OnBufferReady(void *custom_data, AudioQueueRef queue, AudioQueueBufferRef buffer);
AudioStreamBasicDescription m_selectedCaps;
AudioQueueRef m_audioQueue;
Aws::Queue<AudioQueueBufferRef> m_bufferQueue;
size_t m_maxBufferSize;
size_t m_bufferCount;
std::mutex m_queueBufferLock;
std::condition_variable m_queueReadySemaphore;
};
}
}

View File

@@ -0,0 +1,42 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#pragma once
#include <aws/text-to-speech/PCMOutputDriver.h>
#include <pulse/simple.h>
namespace Aws
{
namespace TextToSpeech
{
/**
* PulseAudio implementation for PCM output.
*/
class PulseAudioPCMOutputDriver : public PCMOutputDriver
{
public:
PulseAudioPCMOutputDriver();
virtual ~PulseAudioPCMOutputDriver();
PulseAudioPCMOutputDriver(const PulseAudioPCMOutputDriver&) = delete;
PulseAudioPCMOutputDriver& operator=(const PulseAudioPCMOutputDriver&) = delete;
PulseAudioPCMOutputDriver(PulseAudioPCMOutputDriver&&) = delete;
PulseAudioPCMOutputDriver& operator=(PulseAudioPCMOutputDriver&&) = delete;
bool WriteBufferToDevice(const unsigned char* buffer, size_t bufferSize) override;
Aws::Vector<DeviceInfo> EnumerateDevices() const override;
void SetActiveDevice(const DeviceInfo& device, const CapabilityInfo& capabilities) override;
const char* GetName() const override;
private:
void InitDevice();
DeviceInfo m_activeDevice;
pa_simple* m_driver;
pa_sample_spec m_selectedCaps;
};
}
}

View File

@@ -0,0 +1,48 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#pragma once
#include <aws/text-to-speech/TextToSpeech_EXPORTS.h>
#include <aws/text-to-speech/PCMOutputDriver.h>
#include <mutex>
#include <Windows.h>
namespace Aws
{
namespace TextToSpeech
{
/**
* Win32 implementation for PCM output.
*/
class AWS_TEXT_TO_SPEECH_API WaveOutPCMOutputDriver : public PCMOutputDriver
{
public:
WaveOutPCMOutputDriver();
virtual ~WaveOutPCMOutputDriver();
WaveOutPCMOutputDriver(const WaveOutPCMOutputDriver&) = delete;
WaveOutPCMOutputDriver& operator=(const WaveOutPCMOutputDriver&) = delete;
WaveOutPCMOutputDriver(WaveOutPCMOutputDriver&&) = delete;
WaveOutPCMOutputDriver& operator=(WaveOutPCMOutputDriver&&) = delete;
virtual bool WriteBufferToDevice(const unsigned char* buffer, size_t bufferSize) override;
virtual Aws::Vector<DeviceInfo> EnumerateDevices() const override;
virtual void SetActiveDevice(const DeviceInfo& device, const CapabilityInfo& caps) override;
const char* GetName() const override;
private:
void InitDevice();
DeviceInfo m_activeDevice;
CapabilityInfo m_selectedCaps;
HWAVEOUT m_waveOut;
bool m_isInit;
std::recursive_mutex m_driverLock;
};
}
}

View File

@@ -0,0 +1,50 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#include <aws/text-to-speech/PCMOutputDriver.h>
#include <aws/core/utils/logging/LogMacros.h>
#ifdef WAVE_OUT
#include <aws/text-to-speech/windows/WaveOutPCMOutputDriver.h>
#elif PULSE
#include <aws/text-to-speech/linux/PulseAudioPCMOutputDriver.h>
#elif CORE_AUDIO
#include <aws/text-to-speech/apple/CoreAudioPCMOutputDriver.h>
#endif
namespace Aws
{
namespace TextToSpeech
{
PCMOutputDriver::~PCMOutputDriver() {}
static const char* CLASS_TAG = "DefaultPCMOutputDriverFactory";
class DefaultPCMOutputDriverFactory : public PCMOutputDriverFactory
{
public:
Aws::Vector<std::shared_ptr<PCMOutputDriver>> LoadDrivers() const
{
Aws::Vector<std::shared_ptr<PCMOutputDriver>> drivers;
#ifdef WAVE_OUT
AWS_LOGSTREAM_INFO(CLASS_TAG, "Adding WaveOut Audio Driver.");
drivers.push_back(Aws::MakeShared<WaveOutPCMOutputDriver>(CLASS_TAG));
#elif PULSE
AWS_LOGSTREAM_INFO(CLASS_TAG, "Adding PulseAudio Audio Driver.");
drivers.push_back(Aws::MakeShared<PulseAudioPCMOutputDriver>(CLASS_TAG));
#elif CORE_AUDIO
AWS_LOGSTREAM_INFO(CLASS_TAG, "Adding CoreAudio Audio Driver.");
drivers.push_back(Aws::MakeShared<CoreAudioPCMOutputDriver>(CLASS_TAG));
#endif
return drivers;
}
};
std::shared_ptr<PCMOutputDriverFactory> DefaultPCMOutputDriverFactoryInitFn()
{
return Aws::MakeShared<DefaultPCMOutputDriverFactory>(CLASS_TAG);
}
}
}

View File

@@ -0,0 +1,181 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#include <aws/text-to-speech/TextToSpeechManager.h>
#include <aws/polly/model/SynthesizeSpeechRequest.h>
#include <aws/polly/model/DescribeVoicesRequest.h>
#include <aws/core/utils/Outcome.h>
#include <aws/core/utils/logging/LogMacros.h>
using namespace Aws::Polly;
using namespace Aws::Polly::Model;
using namespace Aws::Utils;
namespace Aws
{
namespace TextToSpeech
{
static const char* CLASS_TAG = "TextToSpeechManager";
struct SendTextCompletionHandlerCallbackContext : public Aws::Client::AsyncCallerContext
{
SendTextCompletedHandler callback;
};
std::shared_ptr<TextToSpeechManager> TextToSpeechManager::Create(const std::shared_ptr<Polly::PollyClient>& pollyClient,
const std::shared_ptr<PCMOutputDriverFactory>& driverFactory)
{
// Because TextToSpeechManager's ctor is private (to ensure it's always constructed as a shared_ptr)
// Aws::MakeShared does not have access to that private constructor. This workaround essentially
// enables Aws::MakeShared to construct TextToSpeechManager.
struct MakeSharedEnabler : public TextToSpeechManager {
MakeSharedEnabler(const std::shared_ptr<Polly::PollyClient>& pollyClient,
const std::shared_ptr<PCMOutputDriverFactory>& driverFactory)
: TextToSpeechManager(pollyClient, driverFactory) {}
};
return Aws::MakeShared<MakeSharedEnabler>(CLASS_TAG, pollyClient, driverFactory);
}
TextToSpeechManager::TextToSpeechManager(const std::shared_ptr<Polly::PollyClient>& pollyClient,
const std::shared_ptr<PCMOutputDriverFactory>& driverFactory)
: m_pollyClient(pollyClient.get()), m_activeVoice(VoiceId::Kimberly)
{
m_drivers = (driverFactory ? driverFactory : DefaultPCMOutputDriverFactoryInitFn())->LoadDrivers();
}
TextToSpeechManager::~TextToSpeechManager()
{
}
void TextToSpeechManager::SendTextToOutputDevice(const char* text, SendTextCompletedHandler handler)
{
if (!m_activeDriver)
{
auto&& devices = EnumerateDevices();
assert(devices.size() > 0);
AWS_LOGSTREAM_INFO(CLASS_TAG, "No device has been configured. Defaulting to the first device available.");
SetActiveDevice(devices.front().second, devices.front().first, devices.front().first.capabilities.front());
}
SynthesizeSpeechRequest synthesizeSpeechRequest;
synthesizeSpeechRequest.WithOutputFormat(OutputFormat::pcm)
.WithSampleRate(StringUtils::to_string(m_selectedCaps.sampleRate))
.WithTextType(TextType::text)
.WithText(text)
.WithVoiceId(m_activeVoice);
auto context = Aws::MakeShared<SendTextCompletionHandlerCallbackContext>(CLASS_TAG);
context->callback = handler;
auto self = shared_from_this();
m_pollyClient->SynthesizeSpeechAsync(synthesizeSpeechRequest, [self](const Polly::PollyClient* client, const Polly::Model::SynthesizeSpeechRequest& request,
const Polly::Model::SynthesizeSpeechOutcome& speechOutcome, const std::shared_ptr<const Aws::Client::AsyncCallerContext>& context)
{self -> OnPollySynthSpeechOutcomeRecieved(client, request, speechOutcome, context);}, context);
}
OutputDeviceList TextToSpeechManager::EnumerateDevices() const
{
OutputDeviceList deviceDriverList;
for (auto& driver : m_drivers)
{
std::lock_guard<std::mutex> m(m_driverLock);
for (auto& deviceInfo : driver->EnumerateDevices())
{
AWS_LOGSTREAM_DEBUG(CLASS_TAG, "Adding device " << deviceInfo.deviceName << " for driver " << driver->GetName());
OutputDevicePair device(deviceInfo, driver);
deviceDriverList.push_back(device);
}
}
return deviceDriverList;
}
void TextToSpeechManager::SetActiveDevice(const std::shared_ptr<PCMOutputDriver>& driver, const DeviceInfo& device, const CapabilityInfo& caps)
{
std::lock_guard<std::mutex> m(m_driverLock);
AWS_LOGSTREAM_DEBUG(CLASS_TAG, "Configuring device " << device.deviceName << " for driver " << driver->GetName()
<< " as the current active device.");
driver->SetActiveDevice(device, caps);
m_activeDriver = driver;
m_selectedCaps = caps;
}
Aws::Vector<std::pair<Aws::String, Aws::String>> TextToSpeechManager::ListAvailableVoices() const
{
Aws::Vector<std::pair<Aws::String, Aws::String>> m_voices;
DescribeVoicesRequest describeVoices;
auto voicesOutcome = m_pollyClient->DescribeVoices(describeVoices);
if (voicesOutcome.IsSuccess())
{
for (auto& voice : voicesOutcome.GetResult().GetVoices())
{
m_voices.push_back(std::pair<Aws::String, Aws::String>(voice.GetName(), voice.GetLanguageName()));
}
}
else
{
AWS_LOGSTREAM_ERROR(CLASS_TAG, "Error while fetching voices. " << voicesOutcome.GetError().GetExceptionName()
<< " " << voicesOutcome.GetError().GetMessage());
}
return m_voices;
}
void TextToSpeechManager::SetActiveVoice(const Aws::String& voice)
{
AWS_LOGSTREAM_DEBUG(CLASS_TAG, "Setting active voice as: " << voice);
m_activeVoice = VoiceIdMapper::GetVoiceIdForName(voice);
}
void TextToSpeechManager::OnPollySynthSpeechOutcomeRecieved(const Polly::PollyClient*, const Polly::Model::SynthesizeSpeechRequest& request,
const Polly::Model::SynthesizeSpeechOutcome& outcome, const std::shared_ptr<const Aws::Client::AsyncCallerContext>& context) const
{
bool played(false);
if(outcome.IsSuccess())
{
auto result = const_cast<Polly::Model::SynthesizeSpeechOutcome&>(outcome).GetResultWithOwnership();
auto& stream = result.GetAudioStream();
AWS_LOGSTREAM_TRACE(CLASS_TAG, "Audio retrieved from Polly. " << result.GetContentType() << " with "
<< result.GetRequestCharacters() << " characters syntesized");
std::streamsize amountRead(0);
unsigned char buffer[BUFF_SIZE];
std::lock_guard<std::mutex> m(m_driverLock);
m_activeDriver->Prime();
bool successfullyPlayed(true);
while (stream && successfullyPlayed)
{
stream.read((char*) buffer, BUFF_SIZE);
auto read = stream.gcount();
AWS_LOGSTREAM_TRACE(CLASS_TAG, "Writing " << read << " bytes to device.");
successfullyPlayed = m_activeDriver->WriteBufferToDevice(buffer, (std::size_t)read);
amountRead += read;
played = successfullyPlayed;
}
m_activeDriver->Flush();
}
else
{
AWS_LOGSTREAM_ERROR(CLASS_TAG, "Error while fetching audio from polly. " << outcome.GetError().GetExceptionName() << " "
<< outcome.GetError().GetMessage());
}
auto callback = ((const std::shared_ptr<SendTextCompletionHandlerCallbackContext>&)context)->callback;
if (callback)
{
callback(request.GetText().c_str(), outcome, played);
}
}
}
}

View File

@@ -0,0 +1,180 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#include <aws/text-to-speech/apple/CoreAudioPCMOutputDriver.h>
#include <aws/core/utils/logging/LogMacros.h>
#include <iostream>
namespace Aws
{
namespace TextToSpeech
{
static const char* CLASS_TAG = "CoreAudioPCMOutputDriver";
CoreAudioPCMOutputDriver::CoreAudioPCMOutputDriver() : m_audioQueue(nullptr), m_maxBufferSize(4096), m_bufferCount(3)
{
}
CoreAudioPCMOutputDriver::~CoreAudioPCMOutputDriver()
{
CleanUp();
}
bool CoreAudioPCMOutputDriver::WriteBufferToDevice(const unsigned char* buffer , size_t size)
{
InitDevice();
bool success(true);
if(m_audioQueue)
{
for(size_t i = 0; i < size && success; i += m_maxBufferSize)
{
std::unique_lock<std::mutex> m(m_queueBufferLock);
while(m_bufferQueue.size() == 0)
{
AWS_LOGSTREAM_DEBUG(CLASS_TAG, " waiting on audio buffers to become available.");
m_queueReadySemaphore.wait(m, [this](){ return m_bufferQueue.size() > 0;});
AWS_LOGSTREAM_TRACE(CLASS_TAG, " an audio buffer has been released, waking up.");
}
if (!m_audioQueue)
{
AWS_LOGSTREAM_ERROR(CLASS_TAG, " audio queue has been cleaned up.");
return false;
}
if(m_bufferQueue.size() > 0)
{
AudioQueueBufferRef audioBuffer = m_bufferQueue.front();
m_bufferQueue.pop();
auto toCpy = (std::min)(m_maxBufferSize, size - i);
AWS_LOGSTREAM_TRACE(CLASS_TAG, " Writing " << toCpy << " bytes to audio device.");
memcpy(audioBuffer->mAudioData, buffer + i, toCpy);
audioBuffer->mAudioDataByteSize = static_cast<UInt32>(toCpy);
auto errorCode = AudioQueueEnqueueBuffer(m_audioQueue, audioBuffer, 0, nullptr);
success = !errorCode;
if(!success)
{
AWS_LOGSTREAM_ERROR(CLASS_TAG, " error while queueing audio output. error code " << errorCode);
}
}
}
}
else
{
AWS_LOGSTREAM_ERROR(CLASS_TAG, " audio queue has not been initialized.");
return false;
}
return success;
}
Aws::Vector<DeviceInfo> CoreAudioPCMOutputDriver::EnumerateDevices() const
{
DeviceInfo devInfo;
devInfo.deviceId = "default";
devInfo.deviceName = "Default Audio Output Queue";
CapabilityInfo caps;
caps.sampleWidthBits = BIT_WIDTH_16;
caps.channels = MONO;
caps.sampleRate = KHZ_16;
devInfo.capabilities.push_back(caps);
caps.sampleRate = KHZ_8;
devInfo.capabilities.push_back(caps);
return Aws::Vector<DeviceInfo>({devInfo});
}
void CoreAudioPCMOutputDriver::SetActiveDevice(const DeviceInfo&, const CapabilityInfo& caps)
{
m_selectedCaps.mSampleRate = caps.sampleRate;
m_selectedCaps.mFormatID = kAudioFormatLinearPCM;
m_selectedCaps.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
m_selectedCaps.mBitsPerChannel = caps.sampleWidthBits;
m_selectedCaps.mChannelsPerFrame = caps.channels;
m_selectedCaps.mBytesPerFrame = caps.channels * (caps.sampleWidthBits / 8);
m_selectedCaps.mFramesPerPacket = 1;
m_selectedCaps.mBytesPerPacket = m_selectedCaps.mBytesPerFrame * m_selectedCaps.mFramesPerPacket;
m_selectedCaps.mReserved = 0;
CleanUp();
InitDevice();
}
const char* CoreAudioPCMOutputDriver::GetName() const
{
return "CoreAudio (Apple Platform)";
}
void CoreAudioPCMOutputDriver::Prime()
{
AudioQueueStart(m_audioQueue, nullptr);
}
void CoreAudioPCMOutputDriver::Flush()
{
AudioQueueFlush(m_audioQueue);
AudioQueueStop(m_audioQueue, false);
}
void CoreAudioPCMOutputDriver::InitDevice()
{
if(!m_audioQueue)
{
AWS_LOGSTREAM_INFO(CLASS_TAG, " Initializing audio queue for sample rate: " << m_selectedCaps.mSampleRate);
AudioQueueNewOutput(&m_selectedCaps, &OnBufferReady, this, nullptr, kCFRunLoopCommonModes, 0, &m_audioQueue);
for (size_t i = 0; i < m_bufferCount; i++)
{
AWS_LOGSTREAM_TRACE(CLASS_TAG, " Allocating buffer of size: " << m_maxBufferSize);
AudioQueueBufferRef buf;
AudioQueueAllocateBuffer(m_audioQueue, static_cast<UInt32>(m_maxBufferSize), &buf);
m_bufferQueue.push(buf);
}
}
}
void CoreAudioPCMOutputDriver::CleanUp()
{
if(m_audioQueue)
{
AWS_LOGSTREAM_INFO(CLASS_TAG, " Cleaning up audio queue");
//make sure all buffers finish processing so we can delete them.
AudioQueueStop(m_audioQueue, false);
std::lock_guard<std::mutex> m(m_queueBufferLock);
while(m_bufferQueue.size() > 0)
{
AWS_LOGSTREAM_DEBUG(CLASS_TAG, " Cleaning up audio buffer");
AudioQueueFreeBuffer(m_audioQueue, m_bufferQueue.front());
m_bufferQueue.pop();
}
//force The audio queue to cleanup the buffers.
AudioQueueDispose(m_audioQueue, true);
m_audioQueue = nullptr;
}
}
void CoreAudioPCMOutputDriver::OnBufferReady(void *custom_data, AudioQueueRef, AudioQueueBufferRef buffer)
{
CoreAudioPCMOutputDriver* driver = (CoreAudioPCMOutputDriver*)custom_data;
{
std::unique_lock<std::mutex> m(driver->m_queueBufferLock);
driver->m_bufferQueue.push(buffer);
}
AWS_LOGSTREAM_DEBUG(CLASS_TAG, "Buffer free, notifying waiting threads.");
driver->m_queueReadySemaphore.notify_one();
}
}
}

View File

@@ -0,0 +1,107 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#include <aws/text-to-speech/linux/PulseAudioPCMOutputDriver.h>
#include <aws/core/utils/StringUtils.h>
#include <aws/core/utils/logging/LogMacros.h>
#include <pulse/error.h>
using namespace Aws::Utils;
namespace Aws
{
namespace TextToSpeech
{
static const char* APP_NAME = "Aws::Polly::TextToSpeech";
static const char* CLASS_NAME = "PulseAudioPCMOutputDriver";
PulseAudioPCMOutputDriver::PulseAudioPCMOutputDriver() : m_driver(0) {}
PulseAudioPCMOutputDriver::~PulseAudioPCMOutputDriver()
{
if(m_driver)
{
pa_simple_free(m_driver);
}
}
bool PulseAudioPCMOutputDriver::WriteBufferToDevice(const unsigned char* buffer, size_t size)
{
InitDevice();
if(m_driver)
{
int error(-1);
if(pa_simple_write(m_driver, buffer, size, &error) < 0)
{
AWS_LOGSTREAM_ERROR(CLASS_NAME, " error writing buffer to output device " << pa_strerror(error));
return false;
}
return true;
}
return false;
}
Aws::Vector<DeviceInfo> PulseAudioPCMOutputDriver::EnumerateDevices() const
{
Aws::Vector<DeviceInfo> devices;
DeviceInfo deviceInfo;
deviceInfo.deviceId = "0";
deviceInfo.deviceName = "default audio output device";
CapabilityInfo capabilityInfo;
capabilityInfo.channels = MONO;
capabilityInfo.sampleRate = KHZ_16;
capabilityInfo.sampleWidthBits = BIT_WIDTH_16;
deviceInfo.capabilities.push_back(capabilityInfo);
capabilityInfo.sampleRate = KHZ_8;
deviceInfo.capabilities.push_back(capabilityInfo);
devices.push_back(deviceInfo);
return devices;
}
void PulseAudioPCMOutputDriver::SetActiveDevice(const DeviceInfo& deviceInfo, const CapabilityInfo& capabilityInfo)
{
m_activeDevice = deviceInfo;
m_selectedCaps.channels = static_cast<uint8_t>(capabilityInfo.channels);
m_selectedCaps.rate = static_cast<uint32_t>(capabilityInfo.sampleRate);
m_selectedCaps.format = PA_SAMPLE_S16LE;
if(m_driver)
{
pa_simple_free(m_driver);
m_driver = nullptr;
}
InitDevice();
}
const char* PulseAudioPCMOutputDriver::GetName() const
{
return "Linux (Pulse Audio)";
}
void PulseAudioPCMOutputDriver::InitDevice()
{
if (!m_driver)
{
int errorCode = -1;
m_driver = pa_simple_new(nullptr, APP_NAME, PA_STREAM_PLAYBACK, NULL, "playback", &m_selectedCaps, nullptr, nullptr, &errorCode);
if(!m_driver)
{
AWS_LOGSTREAM_ERROR(CLASS_NAME, " error initializing device " << pa_strerror(errorCode));
}
}
}
}
}

View File

@@ -0,0 +1,193 @@
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#include <aws/text-to-speech/windows/WaveOutPCMOutputDriver.h>
#include <aws/core/utils/StringUtils.h>
#include <aws/core/utils/logging/LogMacros.h>
#include <windows.h>
using namespace Aws::Utils;
static void CALLBACK waveOutProc(HWAVEOUT waveOut, UINT uMsg, DWORD_PTR, DWORD_PTR dwParam1, DWORD_PTR)
{
switch (uMsg)
{
case WOM_CLOSE:
return;
case WOM_DONE:
waveOutUnprepareHeader(waveOut, (WAVEHDR*)dwParam1, sizeof(WAVEHDR));
Aws::DeleteArray(((WAVEHDR*)dwParam1)->lpData);
Aws::Delete((WAVEHDR*)dwParam1);
return;
case WOM_OPEN:
return;
default:
return;
}
}
namespace Aws
{
namespace TextToSpeech
{
static const char* CLASS_TAG = "WaveOutPCMOutputDriver";
WaveOutPCMOutputDriver::WaveOutPCMOutputDriver() : m_waveOut(nullptr) {}
WaveOutPCMOutputDriver::~WaveOutPCMOutputDriver()
{
if (m_waveOut)
{
waveOutClose(m_waveOut);
m_waveOut = nullptr;
}
}
bool WaveOutPCMOutputDriver::WriteBufferToDevice(const unsigned char* buffer, size_t size)
{
InitDevice();
WAVEHDR* waveHdr = Aws::New<WAVEHDR>(CLASS_TAG);
char* buf = Aws::NewArray<char>(size, CLASS_TAG);
memcpy(buf, buffer, size);
waveHdr->lpData = buf;
waveHdr->dwBufferLength = static_cast<DWORD>(size);
waveHdr->dwFlags = 0;
waveHdr->dwLoops = 0;
waveHdr->dwUser = NULL;
std::lock_guard<std::recursive_mutex> m(m_driverLock);
if (m_waveOut)
{
auto res = waveOutPrepareHeader(m_waveOut, waveHdr, sizeof(WAVEHDR));
if (res != MMSYSERR_NOERROR)
{
AWS_LOGSTREAM_ERROR(CLASS_TAG, "Error code " << res << " returned from waveOutPrepareHeader");
return false;
}
res = waveOutWrite(m_waveOut, waveHdr, sizeof(WAVEHDR));
if (res != MMSYSERR_NOERROR)
{
AWS_LOGSTREAM_ERROR(CLASS_TAG, "Error code " << res << " returned from waveOutWrite");
return false;
}
}
return true;
}
const char* WaveOutPCMOutputDriver::GetName() const
{
return "Win32 WaveOut";
}
void WaveOutPCMOutputDriver::InitDevice()
{
if (!m_isInit)
{
AWS_LOGSTREAM_INFO(CLASS_TAG, "Initializing device " << m_activeDevice.deviceName);
std::lock_guard<std::recursive_mutex> m(m_driverLock);
if (m_waveOut)
{
AWS_LOGSTREAM_TRACE(CLASS_TAG, "Cleaning up current device ");
waveOutClose(m_waveOut);
m_waveOut = nullptr;
}
WAVEFORMATEX format;
format.nChannels = static_cast<WORD>(m_selectedCaps.channels);
format.nSamplesPerSec = static_cast<DWORD>(m_selectedCaps.sampleRate);
format.wBitsPerSample = static_cast<WORD>(m_selectedCaps.sampleWidthBits);
format.wFormatTag = WAVE_FORMAT_PCM;
format.nBlockAlign = (format.nChannels * format.wBitsPerSample) / 8;
format.cbSize = 0;
format.nAvgBytesPerSec = format.nSamplesPerSec * format.nBlockAlign;
if (m_activeDevice.deviceId.empty())
{
AWS_LOGSTREAM_INFO(CLASS_TAG, "No device configured, letting windows figure out the best default.");
auto res = waveOutOpen(&m_waveOut, WAVE_MAPPER, &format, (DWORD_PTR)&waveOutProc, NULL,
CALLBACK_FUNCTION | WAVE_ALLOWSYNC | WAVE_MAPPED_DEFAULT_COMMUNICATION_DEVICE);
m_isInit = !res;
}
else
{
unsigned id = static_cast<unsigned>(StringUtils::ConvertToInt32(m_activeDevice.deviceId.c_str()));
auto res = waveOutOpen(&m_waveOut, id, &format, (DWORD_PTR)&waveOutProc, NULL,
CALLBACK_FUNCTION | WAVE_ALLOWSYNC);
m_isInit = !res;
}
if (!m_isInit)
{
AWS_LOGSTREAM_ERROR(CLASS_TAG, "Failed to initialize device");
}
}
}
void WaveOutPCMOutputDriver::SetActiveDevice(const DeviceInfo& device, const CapabilityInfo& caps)
{
std::lock_guard<std::recursive_mutex> m(m_driverLock);
m_activeDevice = device;
m_selectedCaps = caps;
m_isInit = false;
InitDevice();
}
Aws::Vector<DeviceInfo> WaveOutPCMOutputDriver::EnumerateDevices() const
{
Aws::Vector<DeviceInfo> devices;
auto deviceCount = waveOutGetNumDevs();
for (UINT i = 0; i < deviceCount; ++i)
{
WAVEOUTCAPSA waveoutCaps;
auto res = waveOutGetDevCapsA(i, &waveoutCaps, sizeof(WAVEOUTCAPSA));
if (!res)
{
DeviceInfo devInfo;
devInfo.deviceId = StringUtils::to_string(i);
devInfo.deviceName = waveoutCaps.szPname;
if ((waveoutCaps.dwFormats & WAVE_FORMAT_1M16) == WAVE_FORMAT_1M16)
{
CapabilityInfo capsInfo;
capsInfo.channels = MONO;
capsInfo.sampleRate = KHZ_8;
capsInfo.sampleWidthBits = BIT_WIDTH_16;
devInfo.capabilities.push_back(capsInfo);
capsInfo.channels = MONO;
capsInfo.sampleRate = KHZ_16;
capsInfo.sampleWidthBits = BIT_WIDTH_16;
devInfo.capabilities.push_back(capsInfo);
}
if ((waveoutCaps.dwFormats & WAVE_FORMAT_2M16) == WAVE_FORMAT_2M16)
{
CapabilityInfo capsInfo;
capsInfo.channels = MONO;
capsInfo.sampleRate = KHZ_22_5;
capsInfo.sampleWidthBits = BIT_WIDTH_16;
devInfo.capabilities.push_back(capsInfo);
}
devices.push_back(devInfo);
}
}
return devices;
}
}
}