Update to 3.18.0

This commit is contained in:
DrKLO 2017-03-31 02:58:05 +03:00
parent 64e8ec3fbd
commit 6a1cf64f6f
2548 changed files with 138705 additions and 66425 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "TMessagesProj/jni/libtgvoip"]
path = TMessagesProj/jni/libtgvoip
url = https://github.com/grishka/libtgvoip

View File

@ -9,19 +9,22 @@ configurations {
}
dependencies {
compile 'com.google.android.gms:play-services-gcm:9.6.1'
compile 'com.google.android.gms:play-services-maps:9.6.1'
compile 'com.google.android.gms:play-services-vision:9.6.1'
compile 'com.android.support:support-core-ui:24.2.1'
compile 'com.android.support:support-compat:24.2.1'
compile 'com.android.support:support-core-utils:24.2.1'
compile 'net.hockeyapp.android:HockeySDK:4.0.1'
compile 'com.google.android.gms:play-services-gcm:10.2.0'
compile 'com.google.android.gms:play-services-maps:10.2.0'
compile 'com.google.android.gms:play-services-vision:10.2.0'
compile 'com.android.support:support-core-ui:25.3.0'
compile 'com.android.support:support-compat:25.3.0'
compile 'com.android.support:support-core-utils:25.3.0'
compile 'com.android.support:support-v13:25.3.0'
compile 'com.android.support:palette-v7:25.3.0'
compile 'net.hockeyapp.android:HockeySDK:4.1.2'
compile 'com.googlecode.mp4parser:isoparser:1.0.6'
compile 'com.stripe:stripe-android:2.0.2'
}
android {
compileSdkVersion 24
buildToolsVersion '24.0.2'
compileSdkVersion 25
buildToolsVersion '25.0.2'
useLibrary 'org.apache.http.legacy'
defaultConfig.applicationId = "org.telegram.messenger"
@ -34,6 +37,10 @@ android {
}
}
dexOptions {
jumboMode = true
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_7
targetCompatibility JavaVersion.VERSION_1_7
@ -70,6 +77,7 @@ android {
jniDebuggable false
signingConfig signingConfigs.release
minifyEnabled false
shrinkResources false
proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
}
@ -80,7 +88,7 @@ android {
}
}
defaultConfig.versionCode = 851
defaultConfig.versionCode = 957
sourceSets.debug {
manifest.srcFile 'config/debug/AndroidManifest.xml'
@ -107,20 +115,52 @@ android {
}
versionCode = 1
}
fat {
x86_SDK23 {
ndk {
abiFilter "x86"
}
sourceSets.debug {
manifest.srcFile 'config/debug/AndroidManifest_SDK23.xml'
}
sourceSets.release {
manifest.srcFile 'config/release/AndroidManifest_SDK23.xml'
}
minSdkVersion 23
versionCode = 4
}
armv7_SDK23 {
ndk {
abiFilter "armeabi-v7a"
}
sourceSets.debug {
manifest.srcFile 'config/debug/AndroidManifest_SDK23.xml'
}
sourceSets.release {
manifest.srcFile 'config/release/AndroidManifest_SDK23.xml'
}
minSdkVersion 23
versionCode = 3
}
fat {
sourceSets.debug {
manifest.srcFile 'config/debug/AndroidManifest_SDK23.xml'
}
sourceSets.release {
manifest.srcFile 'config/release/AndroidManifest_SDK23.xml'
}
versionCode = 5
}
}
applicationVariants.all { variant ->
def abiVersion = variant.productFlavors.get(0).versionCode
variant.mergedFlavor.versionCode = defaultConfig.versionCode * 10 + abiVersion;
variant.mergedFlavor.versionCode = defaultConfig.versionCode * 10 + abiVersion
}
defaultConfig {
minSdkVersion 14
targetSdkVersion 24
versionName "3.13.1"
targetSdkVersion 25
versionName "3.18.0"
externalNativeBuild {
ndkBuild {

View File

@ -0,0 +1,87 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
package="org.telegram.messenger"
android:installLocation="auto">
<uses-feature android:name="android.hardware.location.gps" android:required="false" />
<uses-feature android:name="android.hardware.location.network" android:required="false" />
<uses-feature android:name="android.hardware.location" android:required="false" />
<uses-feature android:name="android.hardware.LOCATION" android:required="false" />
<uses-permission android:name="com.google.android.c2dm.permission.RECEIVE" />
<uses-permission android:name="org.telegram.messenger.permission.MAPS_RECEIVE"/>
<uses-permission android:name="org.telegram.messenger.permission.C2D_MESSAGE" />
<uses-permission android:name="com.google.android.providers.gsf.permission.READ_GSERVICES"/>
<uses-permission android:name="android.permission.ACCESS_COARSE_LOCATION" />
<uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
<uses-permission android:name="android.permission.CALL_PHONE" />
<uses-permission android:name="android.permission.READ_CALL_LOG" />
<uses-permission android:name="android.permission.WRITE_CALL_LOG" />
<permission android:name="org.telegram.messenger.permission.MAPS_RECEIVE" android:protectionLevel="signature"/>
<permission android:name="org.telegram.messenger.permission.C2D_MESSAGE" android:protectionLevel="signature" />
<application
android:allowBackup="false"
android:icon="@drawable/ic_launcher"
android:label="@string/AppNameBeta"
android:theme="@style/Theme.TMessages.Start"
android:name=".ApplicationLoader"
android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true">
<meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" />
<activity android:name="net.hockeyapp.android.UpdateActivity" />
<receiver
android:name="com.google.android.gms.gcm.GcmReceiver"
android:exported="true"
android:permission="com.google.android.c2dm.permission.SEND" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
<action android:name="com.google.android.c2dm.intent.REGISTRATION" />
<category android:name="org.telegram.messenger" />
</intent-filter>
</receiver>
<service
android:name=".GcmPushListenerService"
android:exported="false" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
</intent-filter>
</service>
<service
android:name=".GcmInstanceIDListenerService"
android:exported="false">
<intent-filter>
<action android:name="com.google.android.gms.iid.InstanceID" />
</intent-filter>
</service>
<service
android:name=".GcmRegistrationIntentService"
android:exported="false">
</service>
<uses-library android:name="com.google.android.maps" android:required="false"/>
<receiver
tools:replace="android:enabled"
android:name="com.google.android.gms.measurement.AppMeasurementReceiver"
android:enabled="false">
<intent-filter>
<action android:name="com.google.android.gms.measurement.UPLOAD" />
</intent-filter>
</receiver>
<service
tools:replace="android:enabled"
android:name="com.google.android.gms.measurement.AppMeasurementService"
android:enabled="false"
android:exported="false" />
</application>
</manifest>

View File

@ -0,0 +1,86 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
package="org.telegram.messenger"
android:installLocation="auto">
<uses-feature android:name="android.hardware.location.gps" android:required="false" />
<uses-feature android:name="android.hardware.location.network" android:required="false" />
<uses-feature android:name="android.hardware.location" android:required="false" />
<uses-feature android:name="android.hardware.LOCATION" android:required="false" />
<uses-permission android:name="com.google.android.c2dm.permission.RECEIVE" />
<uses-permission android:name="org.telegram.messenger.permission.MAPS_RECEIVE"/>
<uses-permission android:name="org.telegram.messenger.permission.C2D_MESSAGE" />
<uses-permission android:name="com.google.android.providers.gsf.permission.READ_GSERVICES"/>
<uses-permission android:name="android.permission.ACCESS_COARSE_LOCATION" />
<uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
<uses-permission android:name="android.permission.CALL_PHONE" />
<uses-permission android:name="android.permission.READ_CALL_LOG" />
<uses-permission android:name="android.permission.WRITE_CALL_LOG" />
<permission android:name="org.telegram.messenger.permission.MAPS_RECEIVE" android:protectionLevel="signature"/>
<permission android:name="org.telegram.messenger.permission.C2D_MESSAGE" android:protectionLevel="signature" />
<application
android:allowBackup="false"
android:icon="@drawable/ic_launcher"
android:label="@string/AppName"
android:theme="@style/Theme.TMessages.Start"
android:name=".ApplicationLoader"
android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true">
<meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" />
<activity android:name="net.hockeyapp.android.UpdateActivity" />
<receiver
android:name="com.google.android.gms.gcm.GcmReceiver"
android:exported="true"
android:permission="com.google.android.c2dm.permission.SEND" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
<action android:name="com.google.android.c2dm.intent.REGISTRATION" />
<category android:name="org.telegram.messenger" />
</intent-filter>
</receiver>
<service
android:name=".GcmPushListenerService"
android:exported="false" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
</intent-filter>
</service>
<service
android:name=".GcmInstanceIDListenerService"
android:exported="false">
<intent-filter>
<action android:name="com.google.android.gms.iid.InstanceID" />
</intent-filter>
</service>
<service
android:name=".GcmRegistrationIntentService"
android:exported="false">
</service>
<uses-library android:name="com.google.android.maps" android:required="false"/>
<receiver
tools:replace="android:enabled"
android:name="com.google.android.gms.measurement.AppMeasurementReceiver"
android:enabled="false">
<intent-filter>
<action android:name="com.google.android.gms.measurement.UPLOAD" />
</intent-filter>
</receiver>
<service
tools:replace="android:enabled"
android:name="com.google.android.gms.measurement.AppMeasurementService"
android:enabled="false"
android:exported="false" />
</application>
</manifest>

View File

@ -108,6 +108,64 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := WebRtcAec
LOCAL_SRC_FILES := ./libtgvoip/external/libWebRtcAec_android_$(TARGET_ARCH_ABI).a
include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := voip
LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -finline-functions -ffast-math -Os -fno-strict-aliasing -O3
LOCAL_CFLAGS := -O3 -DUSE_KISS_FFT -fexceptions
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
# LOCAL_CPPFLAGS += -mfloat-abi=softfp -mfpu=neon
# LOCAL_CFLAGS += -mfloat-abi=softfp -mfpu=neon -DFLOATING_POINT
# LOCAL_ARM_NEON := true
else
LOCAL_CFLAGS += -DFIXED_POINT
ifeq ($(TARGET_ARCH_ABI),armeabi)
# LOCAL_CPPFLAGS += -mfloat-abi=softfp -mfpu=neon
# LOCAL_CFLAGS += -mfloat-abi=softfp -mfpu=neon
else
ifeq ($(TARGET_ARCH_ABI),x86)
endif
endif
endif
MY_DIR := libtgvoip
LOCAL_C_INCLUDES := jni/opus/include jni/boringssl/include/
LOCAL_SRC_FILES := \
./libtgvoip/logging.cpp \
./libtgvoip/VoIPController.cpp \
./libtgvoip/BufferInputStream.cpp \
./libtgvoip/BufferOutputStream.cpp \
./libtgvoip/BlockingQueue.cpp \
./libtgvoip/audio/AudioInput.cpp \
./libtgvoip/os/android/AudioInputOpenSLES.cpp \
./libtgvoip/MediaStreamItf.cpp \
./libtgvoip/audio/AudioOutput.cpp \
./libtgvoip/OpusEncoder.cpp \
./libtgvoip/os/android/AudioOutputOpenSLES.cpp \
./libtgvoip/JitterBuffer.cpp \
./libtgvoip/OpusDecoder.cpp \
./libtgvoip/BufferPool.cpp \
./libtgvoip/os/android/OpenSLEngineWrapper.cpp \
./libtgvoip/os/android/AudioInputAndroid.cpp \
./libtgvoip/os/android/AudioOutputAndroid.cpp \
./libtgvoip/EchoCanceller.cpp \
./libtgvoip/CongestionControl.cpp \
./libtgvoip/VoIPServerConfig.cpp
include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -frtti -DHAVE_PTHREAD -finline-functions -ffast-math -O0
LOCAL_C_INCLUDES += ./jni/boringssl/include/
LOCAL_ARM_MODE := arm
@ -237,13 +295,13 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_PRELINK_MODULE := false
LOCAL_MODULE := tmessages.24
LOCAL_MODULE := tmessages.26
LOCAL_CFLAGS := -w -std=c11 -Os -DNULL=0 -DSOCKLEN_T=socklen_t -DLOCALE_NOT_USED -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
LOCAL_CFLAGS += -Drestrict='' -D__EMX__ -DOPUS_BUILD -DFIXED_POINT -DUSE_ALLOCA -DHAVE_LRINT -DHAVE_LRINTF -fno-math-errno
LOCAL_CFLAGS += -DANDROID_NDK -DDISABLE_IMPORTGL -fno-strict-aliasing -fprefetch-loop-arrays -DAVOID_TABLES -DANDROID_TILE_BASED_DECODE -DANDROID_ARMV6_IDCT -ffast-math -D__STDC_CONSTANT_MACROS
LOCAL_CPPFLAGS := -DBSD=1 -ffast-math -Os -funroll-loops -std=c++11
LOCAL_LDLIBS := -ljnigraphics -llog -lz -latomic
LOCAL_STATIC_LIBRARIES := webp sqlite tgnet breakpad avformat avcodec avutil
LOCAL_LDLIBS := -ljnigraphics -llog -lz -latomic -lOpenSLES
LOCAL_STATIC_LIBRARIES := webp sqlite tgnet breakpad avformat avcodec avutil voip WebRtcAec
LOCAL_SRC_FILES := \
./opus/src/opus.c \
@ -261,6 +319,14 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_ARM_MODE := arm
LOCAL_CPPFLAGS += -DLIBYUV_NEON
LOCAL_CFLAGS += -DLIBYUV_NEON
LOCAL_CFLAGS += -DOPUS_HAVE_RTCD -DOPUS_ARM_ASM
LOCAL_SRC_FILES += \
# ./opus/celt/arm/celt_neon_intr.c \
# ./opus/silk/arm/NSQ_neon.c \
./opus/silk/arm/arm_silk_map.c
# LOCAL_SRC_FILES += ./opus/celt/arm/celt_pitch_xcorr_arm-gnu.S
else
ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_ARM_MODE := arm
@ -270,8 +336,25 @@ else
LOCAL_CFLAGS += -Dx86fix
LOCAL_CPPFLAGS += -Dx86fix
LOCAL_ARM_MODE := arm
LOCAL_SRC_FILE += \
./libyuv/source/row_x86.asm
# LOCAL_SRC_FILES += \
# ./libyuv/source/row_x86.asm
# LOCAL_SRC_FILES += \
# ./opus/celt/x86/celt_lpc_sse.c \
# ./opus/celt/x86/pitch_sse.c \
# ./opus/celt/x86/pitch_sse2.c \
# ./opus/celt/x86/pitch_sse4_1.c \
# ./opus/celt/x86/vq_sse2.c \
# ./opus/celt/x86/x86_celt_map.c \
# ./opus/celt/x86/x86cpu.c \
# ./opus/silk/fixed/x86/burg_modified_FIX_sse.c \
# ./opus/silk/fixed/x86/vector_ops_FIX_sse.c \
# ./opus/silk/x86/NSQ_del_dec_sse.c \
# ./opus/silk/x86/NSQ_sse.c \
# ./opus/silk/x86/VAD_sse.c \
# ./opus/silk/x86/VQ_WMat_sse.c \
# ./opus/silk/x86/x86_silk_map.c
endif
endif
endif
@ -352,7 +435,8 @@ LOCAL_SRC_FILES += \
./opus/silk/stereo_decode_pred.c \
./opus/silk/stereo_encode_pred.c \
./opus/silk/stereo_find_predictor.c \
./opus/silk/stereo_quant_pred.c
./opus/silk/stereo_quant_pred.c \
./opus/silk/LPC_fit.c
LOCAL_SRC_FILES += \
./opus/silk/fixed/LTP_analysis_filter_FIX.c \
@ -364,12 +448,10 @@ LOCAL_SRC_FILES += \
./opus/silk/fixed/find_pitch_lags_FIX.c \
./opus/silk/fixed/find_pred_coefs_FIX.c \
./opus/silk/fixed/noise_shape_analysis_FIX.c \
./opus/silk/fixed/prefilter_FIX.c \
./opus/silk/fixed/process_gains_FIX.c \
./opus/silk/fixed/regularize_correlations_FIX.c \
./opus/silk/fixed/residual_energy16_FIX.c \
./opus/silk/fixed/residual_energy_FIX.c \
./opus/silk/fixed/solve_LS_FIX.c \
./opus/silk/fixed/warped_autocorrelation_FIX.c \
./opus/silk/fixed/apply_sine_window_FIX.c \
./opus/silk/fixed/autocorr_FIX.c \
@ -483,7 +565,8 @@ LOCAL_SRC_FILES += \
./gifvideo.cpp \
./SqliteWrapper.cpp \
./TgNetWrapper.cpp \
./NativeLoader.cpp
./NativeLoader.cpp \
./libtgvoip/client/android/tg_voip_jni.cpp
include $(BUILD_SHARED_LIBRARY)

View File

@ -1,4 +1,4 @@
APP_PLATFORM := android-9
APP_PLATFORM := android-14
APP_ABI := armeabi armeabi-v7a
NDK_TOOLCHAIN_VERSION := 4.9
APP_STL := gnustl_static

View File

@ -26,6 +26,8 @@ jmethodID jclass_ConnectionsManager_onLogout;
jmethodID jclass_ConnectionsManager_onConnectionStateChanged;
jmethodID jclass_ConnectionsManager_onInternalPushReceived;
jmethodID jclass_ConnectionsManager_onUpdateConfig;
jmethodID jclass_ConnectionsManager_onBytesSent;
jmethodID jclass_ConnectionsManager_onBytesReceived;
jint createLoadOpetation(JNIEnv *env, jclass c, jint dc_id, jlong id, jlong volume_id, jlong access_hash, jint local_id, jbyteArray encKey, jbyteArray encIv, jstring extension, jint version, jint size, jstring dest, jstring temp, jobject delegate) {
if (encKey != nullptr && encIv == nullptr || encKey == nullptr && encIv != nullptr || extension == nullptr || dest == nullptr || temp == nullptr) {
@ -168,7 +170,7 @@ void sendRequest(JNIEnv *env, jclass c, jint object, jobject onComplete, jobject
if (onQuickAck != nullptr) {
onQuickAck = env->NewGlobalRef(onQuickAck);
}
ConnectionsManager::getInstance().sendRequest(request, ([onComplete](TLObject *response, TL_error *error) {
ConnectionsManager::getInstance().sendRequest(request, ([onComplete](TLObject *response, TL_error *error, int32_t networkType) {
TL_api_response *resp = (TL_api_response *) response;
jint ptr = 0;
jint errorCode = 0;
@ -180,7 +182,7 @@ void sendRequest(JNIEnv *env, jclass c, jint object, jobject onComplete, jobject
errorText = jniEnv->NewStringUTF(error->text.c_str());
}
if (onComplete != nullptr) {
jniEnv->CallVoidMethod(onComplete, jclass_RequestDelegateInternal_run, ptr, errorCode, errorText);
jniEnv->CallVoidMethod(onComplete, jclass_RequestDelegateInternal_run, ptr, errorCode, errorText, networkType);
}
if (errorText != nullptr) {
jniEnv->DeleteLocalRef(errorText);
@ -246,8 +248,8 @@ void setUseIpv6(JNIEnv *env, jclass c, bool value) {
ConnectionsManager::getInstance().setUseIpv6(value);
}
void setNetworkAvailable(JNIEnv *env, jclass c, jboolean value) {
ConnectionsManager::getInstance().setNetworkAvailable(value);
void setNetworkAvailable(JNIEnv *env, jclass c, jboolean value, jint networkType) {
ConnectionsManager::getInstance().setNetworkAvailable(value, networkType);
}
void setPushConnectionEnabled(JNIEnv *env, jclass c, jboolean value) {
@ -289,9 +291,17 @@ class Delegate : public ConnectiosManagerDelegate {
void onInternalPushReceived() {
jniEnv->CallStaticVoidMethod(jclass_ConnectionsManager, jclass_ConnectionsManager_onInternalPushReceived);
}
void onBytesReceived(int32_t amount, int32_t networkType) {
jniEnv->CallStaticVoidMethod(jclass_ConnectionsManager, jclass_ConnectionsManager_onBytesReceived, amount, networkType);
}
void onBytesSent(int32_t amount, int32_t networkType) {
jniEnv->CallStaticVoidMethod(jclass_ConnectionsManager, jclass_ConnectionsManager_onBytesSent, amount, networkType);
}
};
void init(JNIEnv *env, jclass c, jint version, jint layer, jint apiId, jstring deviceModel, jstring systemVersion, jstring appVersion, jstring langCode, jstring configPath, jstring logPath, jint userId, jboolean enablePushConnection) {
void init(JNIEnv *env, jclass c, jint version, jint layer, jint apiId, jstring deviceModel, jstring systemVersion, jstring appVersion, jstring langCode, jstring configPath, jstring logPath, jint userId, jboolean enablePushConnection, jboolean hasNetwork, jint networkType) {
const char *deviceModelStr = env->GetStringUTFChars(deviceModel, 0);
const char *systemVersionStr = env->GetStringUTFChars(systemVersion, 0);
const char *appVersionStr = env->GetStringUTFChars(appVersion, 0);
@ -299,7 +309,7 @@ void init(JNIEnv *env, jclass c, jint version, jint layer, jint apiId, jstring d
const char *configPathStr = env->GetStringUTFChars(configPath, 0);
const char *logPathStr = env->GetStringUTFChars(logPath, 0);
ConnectionsManager::getInstance().init(version, layer, apiId, std::string(deviceModelStr), std::string(systemVersionStr), std::string(appVersionStr), std::string(langCodeStr), std::string(configPathStr), std::string(logPathStr), userId, true, enablePushConnection);
ConnectionsManager::getInstance().init(version, layer, apiId, std::string(deviceModelStr), std::string(systemVersionStr), std::string(appVersionStr), std::string(langCodeStr), std::string(configPathStr), std::string(logPathStr), userId, true, enablePushConnection, hasNetwork, networkType);
if (deviceModelStr != 0) {
env->ReleaseStringUTFChars(deviceModel, deviceModelStr);
@ -339,13 +349,13 @@ static JNINativeMethod ConnectionsManagerMethods[] = {
{"native_applyDatacenterAddress", "(ILjava/lang/String;I)V", (void *) applyDatacenterAddress},
{"native_getConnectionState", "()I", (void *) getConnectionState},
{"native_setUserId", "(I)V", (void *) setUserId},
{"native_init", "(IIILjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;IZ)V", (void *) init},
{"native_init", "(IIILjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;IZZI)V", (void *) init},
{"native_switchBackend", "()V", (void *) switchBackend},
{"native_pauseNetwork", "()V", (void *) pauseNetwork},
{"native_resumeNetwork", "(Z)V", (void *) resumeNetwork},
{"native_updateDcSettings", "()V", (void *) updateDcSettings},
{"native_setUseIpv6", "(Z)V", (void *) setUseIpv6},
{"native_setNetworkAvailable", "(Z)V", (void *) setNetworkAvailable},
{"native_setNetworkAvailable", "(ZI)V", (void *) setNetworkAvailable},
{"native_setPushConnectionEnabled", "(Z)V", (void *) setPushConnectionEnabled},
{"native_setJava", "(Z)V", (void *) setJava}
};
@ -381,7 +391,7 @@ extern "C" int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env) {
if (jclass_RequestDelegateInternal == 0) {
return JNI_FALSE;
}
jclass_RequestDelegateInternal_run = env->GetMethodID(jclass_RequestDelegateInternal, "run", "(IILjava/lang/String;)V");
jclass_RequestDelegateInternal_run = env->GetMethodID(jclass_RequestDelegateInternal, "run", "(IILjava/lang/String;I)V");
if (jclass_RequestDelegateInternal_run == 0) {
return JNI_FALSE;
}
@ -447,6 +457,14 @@ extern "C" int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env) {
if (jclass_ConnectionsManager_onUpdateConfig == 0) {
return JNI_FALSE;
}
jclass_ConnectionsManager_onBytesSent = env->GetStaticMethodID(jclass_ConnectionsManager, "onBytesSent", "(II)V");
if (jclass_ConnectionsManager_onBytesSent == 0) {
return JNI_FALSE;
}
jclass_ConnectionsManager_onBytesReceived = env->GetStaticMethodID(jclass_ConnectionsManager, "onBytesReceived", "(II)V");
if (jclass_ConnectionsManager_onBytesReceived == 0) {
return JNI_FALSE;
}
ConnectionsManager::getInstance().setDelegate(new Delegate());
return JNI_TRUE;

View File

@ -127,7 +127,7 @@ void ec_GFp_nistp_points_make_affine_internal(
*
* A left-shift followed by subtraction of the original value yields a new
* representation of the same value, using signed bits s_i = b_(i+1) - b_i.
* This representation from Booth's paper has since appeared in the
* This representation from Booth's paper has since onAppeared in the
* literature under a variety of different names including "reversed binary
* form", "alternating greedy expansion", "mutual opposite form", and
* "sign-alternating {+-1}-representation".

View File

@ -273,6 +273,5 @@ jint Java_org_telegram_ui_Components_AnimatedFileDrawable_getVideoFrame(JNIEnv *
return 1;
}
}
return 0;
}
}

@ -0,0 +1 @@
Subproject commit eb813e1d133fdfb96f5efe9c767e86136aab1133

View File

@ -58,16 +58,12 @@
# define S_MUL(a,b) MULT16_32_Q15(b, a)
# define C_MUL(m,a,b) \
do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
# define C_MULC(m,a,b) \
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
# define C_MUL4(m,a,b) \
do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
(m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)
do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
# define C_MULBYSCALAR( c, s ) \
do{ (c).r = S_MUL( (c).r , s ) ;\
@ -81,17 +77,17 @@
DIVSCALAR( (c).i , div); }while (0)
#define C_ADD( res, a,b)\
do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \
do {(res).r=ADD32_ovflw((a).r,(b).r); (res).i=ADD32_ovflw((a).i,(b).i); \
}while(0)
#define C_SUB( res, a,b)\
do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \
do {(res).r=SUB32_ovflw((a).r,(b).r); (res).i=SUB32_ovflw((a).i,(b).i); \
}while(0)
#define C_ADDTO( res , a)\
do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\
do {(res).r = ADD32_ovflw((res).r, (a).r); (res).i = ADD32_ovflw((res).i,(a).i);\
}while(0)
#define C_SUBFROM( res , a)\
do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \
do {(res).r = ADD32_ovflw((res).r,(a).r); (res).i = SUB32_ovflw((res).i,(a).i); \
}while(0)
#if defined(OPUS_ARM_INLINE_ASM)
@ -101,6 +97,9 @@
#if defined(OPUS_ARM_INLINE_EDSP)
#include "arm/kiss_fft_armv5e.h"
#endif
#if defined(MIPSr1_ASM)
#include "mips/kiss_fft_mipsr1.h"
#endif
#else /* not FIXED_POINT*/

View File

@ -46,6 +46,14 @@
# endif
# endif
#if OPUS_GNUC_PREREQ(3, 0)
#define opus_likely(x) (__builtin_expect(!!(x), 1))
#define opus_unlikely(x) (__builtin_expect(!!(x), 0))
#else
#define opus_likely(x) (!!(x))
#define opus_unlikely(x) (!!(x))
#endif
#define CELT_SIG_SCALE 32768.f
#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
@ -69,11 +77,8 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
#define IMUL32(a,b) ((a)*(b))
#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */
#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */
#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */
#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */
#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */
#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */
#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */
#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */
@ -81,6 +86,15 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
#define UADD32(a,b) ((a)+(b))
#define USUB32(a,b) ((a)-(b))
/* Set this if opus_int64 is a native type of the CPU. */
/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
(which can be ILP32 for x32) and Win64 (which is LLP64). */
#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
#define OPUS_FAST_INT64 1
#else
#define OPUS_FAST_INT64 0
#endif
#define PRINT_MIPS(file)
#ifdef FIXED_POINT
@ -95,6 +109,9 @@ typedef opus_val32 celt_ener;
#define Q15ONE 32767
#define SIG_SHIFT 12
/* Safe saturation value for 32-bit signals. Should be less than
2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
#define SIG_SAT (300000000)
#define NORM_SCALING 16384
@ -108,13 +125,22 @@ typedef opus_val32 celt_ener;
#define SCALEIN(a) (a)
#define SCALEOUT(a) (a)
#define ABS16(x) ((x) < 0 ? (-(x)) : (x))
#define ABS32(x) ((x) < 0 ? (-(x)) : (x))
static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
}
#ifdef FIXED_DEBUG
#include "fixed_debug.h"
#else
#include "fixed_generic.h"
#ifdef OPUS_ARM_INLINE_EDSP
#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
#include "arm/fixed_arm64.h"
#elif OPUS_ARM_INLINE_EDSP
#include "arm/fixed_armv5e.h"
#elif defined (OPUS_ARM_INLINE_ASM)
#include "arm/fixed_armv4.h"
@ -137,6 +163,22 @@ typedef float celt_sig;
typedef float celt_norm;
typedef float celt_ener;
#ifdef FLOAT_APPROX
/* This code should reliably detect NaN/inf even when -ffast-math is used.
Assumes IEEE 754 format. */
static OPUS_INLINE int celt_isnan(float x)
{
union {float f; opus_uint32 i;} in;
in.f = x;
return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0;
}
#else
#ifdef __FAST_MATH__
#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input
#endif
#define celt_isnan(x) ((x)!=(x))
#endif
#define Q15ONE 1.0f
#define NORM_SCALING 1.f
@ -146,11 +188,16 @@ typedef float celt_ener;
#define VERY_LARGE16 1e15f
#define Q15_ONE ((opus_val16)1.f)
/* This appears to be the same speed as C99's fabsf() but it's more portable. */
#define ABS16(x) ((float)fabs(x))
#define ABS32(x) ((float)fabs(x))
#define QCONST16(x,bits) (x)
#define QCONST32(x,bits) (x)
#define NEG16(x) (-(x))
#define NEG32(x) (-(x))
#define NEG32_ovflw(x) (-(x))
#define EXTRACT16(x) (x)
#define EXTEND32(x) (x)
#define SHR16(a,shift) (a)
@ -167,6 +214,7 @@ typedef float celt_ener;
#define SATURATE16(x) (x)
#define ROUND16(a,shift) (a)
#define SROUND16(a,shift) (a)
#define HALF16(x) (.5f*(x))
#define HALF32(x) (.5f*(x))
@ -174,6 +222,8 @@ typedef float celt_ener;
#define SUB16(a,b) ((a)-(b))
#define ADD32(a,b) ((a)+(b))
#define SUB32(a,b) ((a)-(b))
#define ADD32_ovflw(a,b) ((a)+(b))
#define SUB32_ovflw(a,b) ((a)-(b))
#define MULT16_16_16(a,b) ((a)*(b))
#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b))
#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b))
@ -184,6 +234,7 @@ typedef float celt_ener;
#define MULT32_32_Q31(a,b) ((a)*(b))
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b))
#define MULT16_16_Q11_32(a,b) ((a)*(b))
#define MULT16_16_Q11(a,b) ((a)*(b))
@ -201,6 +252,8 @@ typedef float celt_ener;
#define SCALEIN(a) ((a)*CELT_SIG_SCALE)
#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE))
#define SIG2WORD16(x) (x)
#endif /* !FIXED_POINT */
#ifndef GLOBAL_STACK_SIZE

View File

@ -1,7 +1,33 @@
#!/usr/bin/perl
# Copyright (C) 2002-2013 Xiph.org Foundation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
my $bigend; # little/big endian
my $nxstack;
my $apple = 0;
my $symprefix = "";
$nxstack = 0;
@ -10,11 +36,16 @@ eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
while ($ARGV[0] =~ /^-/) {
$_ = shift;
last if /^--/;
if (/^-n/) {
last if /^--$/;
if (/^-n$/) {
$nflag++;
next;
}
if (/^--apple$/) {
$apple = 1;
$symprefix = "_";
next;
}
die "I don't recognize this switch: $_\\n";
}
$printit++ unless $nflag;
@ -25,6 +56,8 @@ $n=0;
$thumb = 0; # ARM mode by default, not Thumb.
@proc_stack = ();
printf (" .syntax unified\n");
LINE:
while (<>) {
@ -53,7 +86,7 @@ while (<>) {
s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
s/\bIMPORT\b/.extern/;
s/\bEXPORT\b/.global/;
s/\bEXPORT\b\s*/.global $symprefix/;
s/^(\s+)\[/$1IF/;
s/^(\s+)\|/$1ELSE/;
s/^(\s+)\]/$1ENDIF/;
@ -109,7 +142,7 @@ while (<>) {
# won't match the original source file (we could use the .line
# directive, which is documented to be obsolete, but then gdb will
# show the wrong line in the translated source file).
s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/;
s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/ unless ($apple);
}
}
@ -131,9 +164,13 @@ while (<>) {
$prefix = "";
if ($proc)
{
$prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc);
$prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple);
# Make sure we $prefix isn't empty here (for the $apple case).
# We handle mangling the label here, make sure it doesn't match
# the label handling below (if $prefix would be empty).
$prefix = "; ";
push(@proc_stack, $proc);
s/^[A-Za-z_\.]\w+/$&:/;
s/^[A-Za-z_\.]\w+/$symprefix$&:/;
}
$prefix = $prefix."\t.thumb_func; " if ($thumb);
s/\bPROC\b/@ $&/;
@ -146,7 +183,7 @@ while (<>) {
my $proc;
s/\bENDP\b/@ $&/;
$proc = pop(@proc_stack);
$_ = "\t.size $proc, .-$proc".$_ if ($proc);
$_ = "\t.size $proc, .-$proc".$_ if ($proc && !$apple);
}
s/\bSUBT\b/@ $&/;
s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25
@ -311,6 +348,6 @@ while (<>) {
}
#If we had a code section, mark that this object doesn't need an executable
# stack.
if ($nxstack) {
if ($nxstack && !$apple) {
printf (" .section\t.note.GNU-stack,\"\",\%\%progbits\n");
}

View File

@ -30,10 +30,15 @@
#endif
#include "pitch.h"
#include "kiss_fft.h"
#include "mdct.h"
#if defined(OPUS_HAVE_RTCD)
# if defined(FIXED_POINT)
# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int , int) = {
celt_pitch_xcorr_c, /* ARMv4 */
@ -41,9 +46,98 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
};
# else
# error "Floating-point implementation is not supported by ARM asm yet." \
"Reconfigure with --disable-rtcd or send patches."
# endif
# endif
# else /* !FIXED_POINT */
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
celt_pitch_xcorr_c, /* EDSP */
celt_pitch_xcorr_c, /* Media */
celt_pitch_xcorr_float_neon /* Neon */
};
# endif
# endif /* FIXED_POINT */
#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \
defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len
) = {
xcorr_kernel_c, /* ARMv4 */
xcorr_kernel_c, /* EDSP */
xcorr_kernel_c, /* Media */
xcorr_kernel_neon_fixed, /* Neon */
};
#endif
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# if defined(HAVE_ARM_NE10)
# if defined(CUSTOM_MODES)
int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
opus_fft_alloc_arm_neon /* Neon with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
opus_fft_free_arm_neon /* Neon with NE10 */
};
# endif /* CUSTOM_MODES */
void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout) = {
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
opus_fft_neon /* Neon with NE10 */
};
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout) = {
opus_ifft_c, /* ARMv4 */
opus_ifft_c, /* EDSP */
opus_ifft_c, /* Media */
opus_ifft_neon /* Neon with NE10 */
};
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift,
int stride, int arch) = {
clt_mdct_forward_c, /* ARMv4 */
clt_mdct_forward_c, /* EDSP */
clt_mdct_forward_c, /* Media */
clt_mdct_forward_neon /* Neon with NE10 */
};
void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift,
int stride, int arch) = {
clt_mdct_backward_c, /* ARMv4 */
clt_mdct_backward_c, /* EDSP */
clt_mdct_backward_c, /* Media */
clt_mdct_backward_neon /* Neon with NE10 */
};
# endif /* HAVE_ARM_NE10 */
# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
#endif /* OPUS_HAVE_RTCD */

View File

@ -37,11 +37,12 @@
#include "cpu_support.h"
#include "os_support.h"
#include "opus_types.h"
#include "arch.h"
#define OPUS_CPU_ARM_V4 (1)
#define OPUS_CPU_ARM_EDSP (1<<1)
#define OPUS_CPU_ARM_MEDIA (1<<2)
#define OPUS_CPU_ARM_NEON (1<<3)
#define OPUS_CPU_ARM_V4_FLAG (1<<OPUS_ARCH_ARM_V4)
#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
#if defined(_MSC_VER)
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@ -55,29 +56,31 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
/* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
* instructions via their assembled hex code.
* All of these instructions should be essentially nops. */
# if defined(OPUS_ARM_MAY_HAVE_EDSP)
# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{
/*PLD [r13]*/
__emit(0xF5DDF000);
flags|=OPUS_CPU_ARM_EDSP;
flags|=OPUS_CPU_ARM_EDSP_FLAG;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/
}
# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{
/*SHADD8 r3,r3,r3*/
__emit(0xE6333F93);
flags|=OPUS_CPU_ARM_MEDIA;
flags|=OPUS_CPU_ARM_MEDIA_FLAG;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/
}
# if defined(OPUS_ARM_MAY_HAVE_NEON)
# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{
/*VORR q0,q0,q0*/
__emit(0xF2200150);
flags|=OPUS_CPU_ARM_NEON;
flags|=OPUS_CPU_ARM_NEON_FLAG;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/
@ -107,26 +110,26 @@ opus_uint32 opus_cpu_capabilities(void)
while(fgets(buf, 512, cpuinfo) != NULL)
{
# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON)
# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* Search for edsp and neon flag */
if(memcmp(buf, "Features", 8) == 0)
{
char *p;
# if defined(OPUS_ARM_MAY_HAVE_EDSP)
p = strstr(buf, " edsp");
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_EDSP;
# endif
flags |= OPUS_CPU_ARM_EDSP_FLAG;
# if defined(OPUS_ARM_MAY_HAVE_NEON)
# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
p = strstr(buf, " neon");
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_NEON;
flags |= OPUS_CPU_ARM_NEON_FLAG;
# endif
}
# endif
# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* Search for media capabilities (>= ARMv6) */
if(memcmp(buf, "CPU architecture:", 17) == 0)
{
@ -134,7 +137,7 @@ opus_uint32 opus_cpu_capabilities(void)
version = atoi(buf+17);
if(version >= 6)
flags |= OPUS_CPU_ARM_MEDIA;
flags |= OPUS_CPU_ARM_MEDIA_FLAG;
}
# endif
}
@ -156,18 +159,26 @@ int opus_select_arch(void)
opus_uint32 flags = opus_cpu_capabilities();
int arch = 0;
if(!(flags & OPUS_CPU_ARM_EDSP))
if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) {
/* Asserts ensure arch values are sequential */
celt_assert(arch == OPUS_ARCH_ARM_V4);
return arch;
}
arch++;
if(!(flags & OPUS_CPU_ARM_MEDIA))
if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) {
celt_assert(arch == OPUS_ARCH_ARM_EDSP);
return arch;
}
arch++;
if(!(flags & OPUS_CPU_ARM_NEON))
if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) {
celt_assert(arch == OPUS_ARCH_ARM_MEDIA);
return arch;
}
arch++;
celt_assert(arch == OPUS_ARCH_ARM_NEON);
return arch;
}

View File

@ -66,6 +66,12 @@
# if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void);
#define OPUS_ARCH_ARM_V4 (0)
#define OPUS_ARCH_ARM_EDSP (1)
#define OPUS_ARCH_ARM_MEDIA (2)
#define OPUS_ARCH_ARM_NEON (3)
# endif
#endif

View File

@ -0,0 +1,174 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_fft.c
@brief ARM Neon optimizations for fft using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#endif
#include <NE10_init.h>
#include <NE10_dsp.h>
#include "os_support.h"
#include "kiss_fft.h"
#include "stack_alloc.h"
#if !defined(FIXED_POINT)
# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
#else
# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
#endif
#if defined(CUSTOM_MODES)
/* nfft lengths in NE10 that support scaled fft */
# define NE10_FFTSCALED_SUPPORT_MAX 4
static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
480, 240, 120, 60
};
int opus_fft_alloc_arm_neon(kiss_fft_state *st)
{
int i;
size_t memneeded = sizeof(struct arch_fft_state);
st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
if (!st->arch_fft)
return -1;
for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
if(st->nfft == ne10_fft_scaled_support[i])
break;
}
if (i == NE10_FFTSCALED_SUPPORT_MAX) {
/* This nfft length (scaled fft) is not supported in NE10 */
st->arch_fft->is_supported = 0;
st->arch_fft->priv = NULL;
}
else {
st->arch_fft->is_supported = 1;
st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
if (st->arch_fft->priv == NULL) {
return -1;
}
}
return 0;
}
void opus_fft_free_arm_neon(kiss_fft_state *st)
{
NE10_FFT_CFG_TYPE_T cfg;
if (!st->arch_fft)
return;
cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
if (cfg)
NE10_FFT_DESTROY_C2C_TYPE(cfg);
opus_free(st->arch_fft);
}
#endif
void opus_fft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
{
NE10_FFT_STATE_TYPE_T state;
NE10_FFT_CFG_TYPE_T cfg = &state;
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_fft_c(st, fin, fout);
}
else {
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
#if !defined(FIXED_POINT)
state.is_forward_scaled = 1;
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 0);
#else
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 0, 1);
#endif
}
RESTORE_STACK;
}
void opus_ifft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
{
NE10_FFT_STATE_TYPE_T state;
NE10_FFT_CFG_TYPE_T cfg = &state;
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_ifft_c(st, fin, fout);
}
else {
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
#if !defined(FIXED_POINT)
state.is_backward_scaled = 0;
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 1);
#else
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 1, 0);
#endif
}
RESTORE_STACK;
}

View File

@ -0,0 +1,258 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_mdct.c
@brief ARM Neon optimizations for mdct using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#endif
#include "kiss_fft.h"
#include "_kiss_fft_guts.h"
#include "mdct.h"
#include "stack_alloc.h"
void clt_mdct_forward_neon(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar, f);
VARDECL(kiss_fft_cpx, f2);
const kiss_fft_state *st = l->kfft[shift];
const kiss_twiddle_scalar *trig;
SAVE_STACK;
N = l->n;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
ALLOC(f2, N4, kiss_fft_cpx);
/* Consider the input to be composed of four blocks: [a, b, c, d] */
/* Window, shuffle, fold */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
for(i=0;i<((overlap+3)>>2);i++)
{
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
*yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
*yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
wp1 = window;
wp2 = window+overlap-1;
for(;i<N4-((overlap+3)>>2);i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = *xp2;
*yp++ = *xp1;
xp1+=2;
xp2-=2;
}
for(;i<N4;i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
*yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
}
/* Pre-rotation */
{
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar *t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_cpx yc;
kiss_twiddle_scalar t0, t1;
kiss_fft_scalar re, im, yr, yi;
t0 = t[i];
t1 = t[N4+i];
re = *yp++;
im = *yp++;
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
yc.r = yr;
yc.i = yi;
f2[i] = yc;
}
}
opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
/* Post-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
const kiss_twiddle_scalar *t = &trig[0];
/* Temp pointers to make it really clear to the compiler what we're doing */
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
*yp1 = yr;
*yp2 = yi;
fp++;
yp1 += 2*stride;
yp2 -= 2*stride;
}
}
RESTORE_STACK;
}
void clt_mdct_backward_neon(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window,
int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar, f);
const kiss_twiddle_scalar *trig;
const kiss_fft_state *st = l->kfft[shift];
N = l->n;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
/* Pre-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
yp[2*i] = yr;
yp[2*i+1] = yi;
xp1+=2*stride;
xp2-=2*stride;
}
}
opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
it in-place. */
{
kiss_fft_scalar * yp0 = out+(overlap>>1);
kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
const kiss_twiddle_scalar *t = &trig[0];
/* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
middle pair will be computed twice. */
for(i=0;i<(N4+1)>>1;i++)
{
kiss_fft_scalar re, im, yr, yi;
kiss_twiddle_scalar t0, t1;
re = yp0[0];
im = yp0[1];
t0 = t[i];
t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
re = yp1[0];
im = yp1[1];
yp0[0] = yr;
yp1[1] = yi;
t0 = t[(N4-i-1)];
t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
yp1[0] = yr;
yp0[1] = yi;
yp0 += 2;
yp1 -= 2;
}
}
/* Mirror on both sides for TDAC */
{
kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
const opus_val16 * OPUS_RESTRICT wp1 = window;
const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
for(i = 0; i < overlap/2; i++)
{
kiss_fft_scalar x1, x2;
x1 = *xp1;
x2 = *yp1;
*yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
*xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
wp1++;
wp2--;
}
}
RESTORE_STACK;
}

View File

@ -0,0 +1,311 @@
/* Copyright (c) 2014-2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_neon_intr.c
@brief ARM Neon Intrinsic optimizations for celt
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#include "../pitch.h"
#if defined(FIXED_POINT)
void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
int32x4_t a = vld1q_s32(sum);
/* Load y[0...3] */
/* This requires len>0 to always be valid (which we assert in the C code). */
int16x4_t y0 = vld1_s16(y);
y += 4;
for (j = 0; j + 8 <= len; j += 8)
{
/* Load x[0...7] */
int16x8_t xx = vld1q_s16(x);
int16x4_t x0 = vget_low_s16(xx);
int16x4_t x4 = vget_high_s16(xx);
/* Load y[4...11] */
int16x8_t yy = vld1q_s16(y);
int16x4_t y4 = vget_low_s16(yy);
int16x4_t y8 = vget_high_s16(yy);
int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0);
int16x4_t y1 = vext_s16(y0, y4, 1);
int16x4_t y5 = vext_s16(y4, y8, 1);
int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1);
int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1);
int16x4_t y2 = vext_s16(y0, y4, 2);
int16x4_t y6 = vext_s16(y4, y8, 2);
int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2);
int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2);
int16x4_t y3 = vext_s16(y0, y4, 3);
int16x4_t y7 = vext_s16(y4, y8, 3);
int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3);
int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3);
y0 = y8;
a = a7;
x += 8;
y += 8;
}
for (; j < len; j++)
{
int16x4_t x0 = vld1_dup_s16(x); /* load next x */
int32x4_t a0 = vmlal_s16(a, y0, x0);
int16x4_t y4 = vld1_dup_s16(y); /* load next y */
y0 = vext_s16(y0, y4, 1);
a = a0;
x++;
y++;
}
vst1q_s32(sum, a);
}
#else
/*
* Function: xcorr_kernel_neon_float
* ---------------------------------
* Computes 4 correlation values and stores them in sum[4]
*/
static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
float32_t sum[4], int len) {
float32x4_t YY[3];
float32x4_t YEXT[3];
float32x4_t XX[2];
float32x2_t XX_2;
float32x4_t SUMM;
const float32_t *xi = x;
const float32_t *yi = y;
celt_assert(len>0);
YY[0] = vld1q_f32(yi);
SUMM = vdupq_n_f32(0);
/* Consume 8 elements in x vector and 12 elements in y
* vector. However, the 12'th element never really gets
* touched in this loop. So, if len == 8, then we only
* must access y[0] to y[10]. y[11] must not be accessed
* hence make sure len > 8 and not len >= 8
*/
while (len > 8) {
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
YY[2] = vld1q_f32(yi);
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
YEXT[0] = vextq_f32(YY[0], YY[1], 1);
SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
YEXT[1] = vextq_f32(YY[0], YY[1], 2);
SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
YEXT[2] = vextq_f32(YY[0], YY[1], 3);
SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0);
YEXT[0] = vextq_f32(YY[1], YY[2], 1);
SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1);
YEXT[1] = vextq_f32(YY[1], YY[2], 2);
SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0);
YEXT[2] = vextq_f32(YY[1], YY[2], 3);
SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1);
YY[0] = YY[2];
len -= 8;
}
/* Consume 4 elements in x vector and 8 elements in y
* vector. However, the 8'th element in y never really gets
* touched in this loop. So, if len == 4, then we only
* must access y[0] to y[6]. y[7] must not be accessed
* hence make sure len>4 and not len>=4
*/
if (len > 4) {
yi += 4;
YY[1] = vld1q_f32(yi);
XX[0] = vld1q_f32(xi);
xi += 4;
SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
YEXT[0] = vextq_f32(YY[0], YY[1], 1);
SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
YEXT[1] = vextq_f32(YY[0], YY[1], 2);
SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
YEXT[2] = vextq_f32(YY[0], YY[1], 3);
SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
YY[0] = YY[1];
len -= 4;
}
while (--len > 0) {
XX_2 = vld1_dup_f32(xi++);
SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
YY[0]= vld1q_f32(++yi);
}
XX_2 = vld1_dup_f32(xi);
SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
vst1q_f32(sum, SUMM);
}
/*
* Function: xcorr_kernel_neon_float_process1
* ---------------------------------
* Computes single correlation values and stores in *sum
*/
static void xcorr_kernel_neon_float_process1(const float32_t *x,
const float32_t *y, float32_t *sum, int len) {
float32x4_t XX[4];
float32x4_t YY[4];
float32x2_t XX_2;
float32x2_t YY_2;
float32x4_t SUMM;
float32x2_t SUMM_2[2];
const float32_t *xi = x;
const float32_t *yi = y;
SUMM = vdupq_n_f32(0);
/* Work on 16 values per iteration */
while (len >= 16) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
XX[2] = vld1q_f32(xi);
xi += 4;
XX[3] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
YY[2] = vld1q_f32(yi);
yi += 4;
YY[3] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
SUMM = vmlaq_f32(SUMM, YY[2], XX[2]);
SUMM = vmlaq_f32(SUMM, YY[3], XX[3]);
len -= 16;
}
/* Work on 8 values */
if (len >= 8) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
len -= 8;
}
/* Work on 4 values */
if (len >= 4) {
XX[0] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
len -= 4;
}
/* Start accumulating results */
SUMM_2[0] = vget_low_f32(SUMM);
if (len >= 2) {
/* While at it, consume 2 more values if available */
XX_2 = vld1_f32(xi);
xi += 2;
YY_2 = vld1_f32(yi);
yi += 2;
SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2);
len -= 2;
}
SUMM_2[1] = vget_high_f32(SUMM);
SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]);
SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]);
/* Ok, now we have result accumulated in SUMM_2[0].0 */
if (len > 0) {
/* Case when you have one value left */
XX_2 = vld1_dup_f32(xi);
YY_2 = vld1_dup_f32(yi);
SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2);
}
vst1_lane_f32(sum, SUMM_2[0], 0);
}
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch) {
int i;
celt_assert(max_pitch > 0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
for (i = 0; i < (max_pitch-3); i += 4) {
xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i,
(float32_t *)xcorr+i, len);
}
/* In case max_pitch isn't multiple of 4
* compute single correlation value per iteration
*/
for (; i < max_pitch; i++) {
xcorr_kernel_neon_float_process1((const float32_t *)_x,
(const float32_t *)_y+i, (float32_t *)xcorr+i, len);
}
}
#endif

View File

@ -0,0 +1,551 @@
.syntax unified
@ Copyright (c) 2007-2008 CSIRO
@ Copyright (c) 2007-2009 Xiph.Org Foundation
@ Copyright (c) 2013 Parrot
@ Written by Aurélien Zanelli
@
@ Redistribution and use in source and binary forms, with or without
@ modification, are permitted provided that the following conditions
@ are met:
@
@ - Redistributions of source code must retain the above copyright
@ notice, this list of conditions and the following disclaimer.
@
@ - Redistributions in binary form must reproduce the above copyright
@ notice, this list of conditions and the following disclaimer in the
@ documentation and/or other materials provided with the distribution.
@
@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
@ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.text; .p2align 2; .arch armv7-a
.fpu neon
.object_arch armv4t
.include "celt/arm/armopts-gnu.S"
.if OPUS_ARM_MAY_HAVE_EDSP
.global celt_pitch_xcorr_edsp
.endif
.if OPUS_ARM_MAY_HAVE_NEON
.global celt_pitch_xcorr_neon
.endif
.if OPUS_ARM_MAY_HAVE_NEON
@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
; xcorr_kernel_neon: @ PROC
xcorr_kernel_neon_start:
@ input:
@ r3 = int len
@ r4 = opus_val16 *x
@ r5 = opus_val16 *y
@ q0 = opus_val32 sum[4]
@ output:
@ q0 = opus_val32 sum[4]
@ preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
@ internal usage:
@ r12 = int j
@ d3 = y_3|y_2|y_1|y_0
@ q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
@ q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
@ q8 = scratch
@
@ Load y[0...3]
@ This requires len>0 to always be valid (which we assert in the C code).
VLD1.16 {d5}, [r5]!
SUBS r12, r3, #8
BLE xcorr_kernel_neon_process4
@ Process 8 samples at a time.
@ This loop loads one y value more than we actually need. Therefore we have to
@ stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
@ reading past the end of the array.
xcorr_kernel_neon_process8:
@ This loop has 19 total instructions (10 cycles to issue, minimum), with
@ - 2 cycles of ARM insrtuctions,
@ - 10 cycles of load/store/byte permute instructions, and
@ - 9 cycles of data processing instructions.
@ On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
@ latter two categories, meaning the whole loop should run in 10 cycles per
@ iteration, barring cache misses.
@
@ Load x[0...7]
VLD1.16 {d6, d7}, [r4]!
@ Unlike VMOV, VAND is a data processsing instruction (and doesn't get
@ assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
VAND d3, d5, d5
SUBS r12, r12, #8
@ Load y[4...11]
VLD1.16 {d4, d5}, [r5]!
VMLAL.S16 q0, d3, d6[0]
VEXT.16 d16, d3, d4, #1
VMLAL.S16 q0, d4, d7[0]
VEXT.16 d17, d4, d5, #1
VMLAL.S16 q0, d16, d6[1]
VEXT.16 d16, d3, d4, #2
VMLAL.S16 q0, d17, d7[1]
VEXT.16 d17, d4, d5, #2
VMLAL.S16 q0, d16, d6[2]
VEXT.16 d16, d3, d4, #3
VMLAL.S16 q0, d17, d7[2]
VEXT.16 d17, d4, d5, #3
VMLAL.S16 q0, d16, d6[3]
VMLAL.S16 q0, d17, d7[3]
BGT xcorr_kernel_neon_process8
@ Process 4 samples here if we have > 4 left (still reading one extra y value).
xcorr_kernel_neon_process4:
ADDS r12, r12, #4
BLE xcorr_kernel_neon_process2
@ Load x[0...3]
VLD1.16 d6, [r4]!
@ Use VAND since it's a data processing instruction again.
VAND d4, d5, d5
SUB r12, r12, #4
@ Load y[4...7]
VLD1.16 d5, [r5]!
VMLAL.S16 q0, d4, d6[0]
VEXT.16 d16, d4, d5, #1
VMLAL.S16 q0, d16, d6[1]
VEXT.16 d16, d4, d5, #2
VMLAL.S16 q0, d16, d6[2]
VEXT.16 d16, d4, d5, #3
VMLAL.S16 q0, d16, d6[3]
@ Process 2 samples here if we have > 2 left (still reading one extra y value).
xcorr_kernel_neon_process2:
ADDS r12, r12, #2
BLE xcorr_kernel_neon_process1
@ Load x[0...1]
VLD2.16 {d6[],d7[]}, [r4]!
@ Use VAND since it's a data processing instruction again.
VAND d4, d5, d5
SUB r12, r12, #2
@ Load y[4...5]
VLD1.32 {d5[]}, [r5]!
VMLAL.S16 q0, d4, d6
VEXT.16 d16, d4, d5, #1
@ Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
@ instead of VEXT, since it's a data-processing instruction.
VSRI.64 d5, d4, #32
VMLAL.S16 q0, d16, d7
@ Process 1 sample using the extra y value we loaded above.
xcorr_kernel_neon_process1:
@ Load next *x
VLD1.16 {d6[]}, [r4]!
ADDS r12, r12, #1
@ y[0...3] are left in d5 from prior iteration(s) (if any)
VMLAL.S16 q0, d5, d6
MOVLE pc, lr
@ Now process 1 last sample, not reading ahead.
@ Load last *y
VLD1.16 {d4[]}, [r5]!
VSRI.64 d4, d5, #16
@ Load last *x
VLD1.16 {d6[]}, [r4]!
VMLAL.S16 q0, d4, d6
MOV pc, lr
.size xcorr_kernel_neon, .-xcorr_kernel_neon @ ENDP
@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
@ opus_val32 *xcorr, int len, int max_pitch)
; celt_pitch_xcorr_neon: @ PROC
@ input:
@ r0 = opus_val16 *_x
@ r1 = opus_val16 *_y
@ r2 = opus_val32 *xcorr
@ r3 = int len
@ output:
@ r0 = int maxcorr
@ internal usage:
@ r4 = opus_val16 *x (for xcorr_kernel_neon())
@ r5 = opus_val16 *y (for xcorr_kernel_neon())
@ r6 = int max_pitch
@ r12 = int j
@ q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon())
STMFD sp!, {r4-r6, lr}
LDR r6, [sp, #16]
VMOV.S32 q15, #1
@ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
SUBS r6, r6, #4
BLT celt_pitch_xcorr_neon_process4_done
celt_pitch_xcorr_neon_process4:
@ xcorr_kernel_neon parameters:
@ r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
MOV r4, r0
MOV r5, r1
VEOR q0, q0, q0
@ xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
@ So we don't save/restore any other registers.
BL xcorr_kernel_neon_start
SUBS r6, r6, #4
VST1.32 {q0}, [r2]!
@ _y += 4
ADD r1, r1, #8
VMAX.S32 q15, q15, q0
@ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
BGE celt_pitch_xcorr_neon_process4
@ We have less than 4 sums left to compute.
celt_pitch_xcorr_neon_process4_done:
ADDS r6, r6, #4
@ Reduce maxcorr to a single value
VMAX.S32 d30, d30, d31
VPMAX.S32 d30, d30, d30
@ if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
BLE celt_pitch_xcorr_neon_done
@ Now compute each remaining sum one at a time.
celt_pitch_xcorr_neon_process_remaining:
MOV r4, r0
MOV r5, r1
VMOV.I32 q0, #0
SUBS r12, r3, #8
BLT celt_pitch_xcorr_neon_process_remaining4
@ Sum terms 8 at a time.
celt_pitch_xcorr_neon_process_remaining_loop8:
@ Load x[0...7]
VLD1.16 {q1}, [r4]!
@ Load y[0...7]
VLD1.16 {q2}, [r5]!
SUBS r12, r12, #8
VMLAL.S16 q0, d4, d2
VMLAL.S16 q0, d5, d3
BGE celt_pitch_xcorr_neon_process_remaining_loop8
@ Sum terms 4 at a time.
celt_pitch_xcorr_neon_process_remaining4:
ADDS r12, r12, #4
BLT celt_pitch_xcorr_neon_process_remaining4_done
@ Load x[0...3]
VLD1.16 {d2}, [r4]!
@ Load y[0...3]
VLD1.16 {d3}, [r5]!
SUB r12, r12, #4
VMLAL.S16 q0, d3, d2
celt_pitch_xcorr_neon_process_remaining4_done:
@ Reduce the sum to a single value.
VADD.S32 d0, d0, d1
VPADDL.S32 d0, d0
ADDS r12, r12, #4
BLE celt_pitch_xcorr_neon_process_remaining_loop_done
@ Sum terms 1 at a time.
celt_pitch_xcorr_neon_process_remaining_loop1:
VLD1.16 {d2[]}, [r4]!
VLD1.16 {d3[]}, [r5]!
SUBS r12, r12, #1
VMLAL.S16 q0, d2, d3
BGT celt_pitch_xcorr_neon_process_remaining_loop1
celt_pitch_xcorr_neon_process_remaining_loop_done:
VST1.32 {d0[0]}, [r2]!
VMAX.S32 d30, d30, d0
SUBS r6, r6, #1
@ _y++
ADD r1, r1, #2
@ if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
BGT celt_pitch_xcorr_neon_process_remaining
celt_pitch_xcorr_neon_done:
VMOV.32 r0, d30[0]
LDMFD sp!, {r4-r6, pc}
.size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon @ ENDP
.endif
.if OPUS_ARM_MAY_HAVE_EDSP
@ This will get used on ARMv7 devices without NEON, so it has been optimized
@ to take advantage of dual-issuing where possible.
; xcorr_kernel_edsp: @ PROC
xcorr_kernel_edsp_start:
@ input:
@ r3 = int len
@ r4 = opus_val16 *_x (must be 32-bit aligned)
@ r5 = opus_val16 *_y (must be 32-bit aligned)
@ r6...r9 = opus_val32 sum[4]
@ output:
@ r6...r9 = opus_val32 sum[4]
@ preserved: r0-r5
@ internal usage
@ r2 = int j
@ r12,r14 = opus_val16 x[4]
@ r10,r11 = opus_val16 y[4]
STMFD sp!, {r2,r4,r5,lr}
LDR r10, [r5], #4 @ Load y[0...1]
SUBS r2, r3, #4 @ j = len-4
LDR r11, [r5], #4 @ Load y[2...3]
BLE xcorr_kernel_edsp_process4_done
LDR r12, [r4], #4 @ Load x[0...1]
@ Stall
xcorr_kernel_edsp_process4:
@ The multiplies must issue from pipeline 0, and can't dual-issue with each
@ other. Every other instruction here dual-issues with a multiply, and is
@ thus "free". There should be no stalls in the body of the loop.
SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_0,y_0)
LDR r14, [r4], #4 @ Load x[2...3]
SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x_0,y_1)
SUBS r2, r2, #4 @ j-=4
SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_0,y_2)
SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x_0,y_3)
SMLATT r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_1,y_1)
LDR r10, [r5], #4 @ Load y[4...5]
SMLATB r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],x_1,y_2)
SMLATT r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_1,y_3)
SMLATB r9, r12, r10, r9 @ sum[3] = MAC16_16(sum[3],x_1,y_4)
LDRGT r12, [r4], #4 @ Load x[0...1]
SMLABB r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_2,y_2)
SMLABT r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x_2,y_3)
SMLABB r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_2,y_4)
SMLABT r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x_2,y_5)
SMLATT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_3,y_3)
LDR r11, [r5], #4 @ Load y[6...7]
SMLATB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],x_3,y_4)
SMLATT r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_3,y_5)
SMLATB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],x_3,y_6)
BGT xcorr_kernel_edsp_process4
xcorr_kernel_edsp_process4_done:
ADDS r2, r2, #4
BLE xcorr_kernel_edsp_done
LDRH r12, [r4], #2 @ r12 = *x++
SUBS r2, r2, #1 @ j--
@ Stall
SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_0)
LDRHGT r14, [r4], #2 @ r14 = *x++
SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x,y_1)
SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_2)
SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x,y_3)
BLE xcorr_kernel_edsp_done
SMLABT r6, r14, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_1)
SUBS r2, r2, #1 @ j--
SMLABB r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x,y_2)
LDRH r10, [r5], #2 @ r10 = y_4 = *y++
SMLABT r8, r14, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_3)
LDRHGT r12, [r4], #2 @ r12 = *x++
SMLABB r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x,y_4)
BLE xcorr_kernel_edsp_done
SMLABB r6, r12, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_2)
CMP r2, #1 @ j--
SMLABT r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_3)
LDRH r2, [r5], #2 @ r2 = y_5 = *y++
SMLABB r8, r12, r10, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_4)
LDRHGT r14, [r4] @ r14 = *x
SMLABB r9, r12, r2, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_5)
BLE xcorr_kernel_edsp_done
SMLABT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_3)
LDRH r11, [r5] @ r11 = y_6 = *y
SMLABB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_4)
SMLABB r8, r14, r2, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_5)
SMLABB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_6)
xcorr_kernel_edsp_done:
LDMFD sp!, {r2,r4,r5,pc}
.size xcorr_kernel_edsp, .-xcorr_kernel_edsp @ ENDP
; celt_pitch_xcorr_edsp: @ PROC
@ input:
@ r0 = opus_val16 *_x (must be 32-bit aligned)
@ r1 = opus_val16 *_y (only needs to be 16-bit aligned)
@ r2 = opus_val32 *xcorr
@ r3 = int len
@ output:
@ r0 = maxcorr
@ internal usage
@ r4 = opus_val16 *x
@ r5 = opus_val16 *y
@ r6 = opus_val32 sum0
@ r7 = opus_val32 sum1
@ r8 = opus_val32 sum2
@ r9 = opus_val32 sum3
@ r1 = int max_pitch
@ r12 = int j
STMFD sp!, {r4-r11, lr}
MOV r5, r1
LDR r1, [sp, #36]
MOV r4, r0
TST r5, #3
@ maxcorr = 1
MOV r0, #1
BEQ celt_pitch_xcorr_edsp_process1u_done
@ Compute one sum at the start to make y 32-bit aligned.
SUBS r12, r3, #4
@ r14 = sum = 0
MOV r14, #0
LDRH r8, [r5], #2
BLE celt_pitch_xcorr_edsp_process1u_loop4_done
LDR r6, [r4], #4
MOV r8, r8, LSL #16
celt_pitch_xcorr_edsp_process1u_loop4:
LDR r9, [r5], #4
SMLABT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0)
LDR r7, [r4], #4
SMLATB r14, r6, r9, r14 @ sum = MAC16_16(sum, x_1, y_1)
LDR r8, [r5], #4
SMLABT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2)
SUBS r12, r12, #4 @ j-=4
SMLATB r14, r7, r8, r14 @ sum = MAC16_16(sum, x_3, y_3)
LDRGT r6, [r4], #4
BGT celt_pitch_xcorr_edsp_process1u_loop4
MOV r8, r8, LSR #16
celt_pitch_xcorr_edsp_process1u_loop4_done:
ADDS r12, r12, #4
celt_pitch_xcorr_edsp_process1u_loop1:
LDRHGE r6, [r4], #2
@ Stall
SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y)
SUBSGE r12, r12, #1
LDRHGT r8, [r5], #2
BGT celt_pitch_xcorr_edsp_process1u_loop1
@ Restore _x
SUB r4, r4, r3, LSL #1
@ Restore and advance _y
SUB r5, r5, r3, LSL #1
@ maxcorr = max(maxcorr, sum)
CMP r0, r14
ADD r5, r5, #2
MOVLT r0, r14
SUBS r1, r1, #1
@ xcorr[i] = sum
STR r14, [r2], #4
BLE celt_pitch_xcorr_edsp_done
celt_pitch_xcorr_edsp_process1u_done:
@ if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
SUBS r1, r1, #4
BLT celt_pitch_xcorr_edsp_process2
celt_pitch_xcorr_edsp_process4:
@ xcorr_kernel_edsp parameters:
@ r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
MOV r6, #0
MOV r7, #0
MOV r8, #0
MOV r9, #0
BL xcorr_kernel_edsp_start @ xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
@ maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
CMP r0, r6
@ _y+=4
ADD r5, r5, #8
MOVLT r0, r6
CMP r0, r7
MOVLT r0, r7
CMP r0, r8
MOVLT r0, r8
CMP r0, r9
MOVLT r0, r9
STMIA r2!, {r6-r9}
SUBS r1, r1, #4
BGE celt_pitch_xcorr_edsp_process4
celt_pitch_xcorr_edsp_process2:
ADDS r1, r1, #2
BLT celt_pitch_xcorr_edsp_process1a
SUBS r12, r3, #4
@ {r10, r11} = {sum0, sum1} = {0, 0}
MOV r10, #0
MOV r11, #0
LDR r8, [r5], #4
BLE celt_pitch_xcorr_edsp_process2_loop_done
LDR r6, [r4], #4
LDR r9, [r5], #4
celt_pitch_xcorr_edsp_process2_loop4:
SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0)
LDR r7, [r4], #4
SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1)
SUBS r12, r12, #4 @ j-=4
SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1)
LDR r8, [r5], #4
SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2)
LDRGT r6, [r4], #4
SMLABB r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_2, y_2)
SMLABT r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_2, y_3)
SMLATT r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_3, y_3)
LDRGT r9, [r5], #4
SMLATB r11, r7, r8, r11 @ sum1 = MAC16_16(sum1, x_3, y_4)
BGT celt_pitch_xcorr_edsp_process2_loop4
celt_pitch_xcorr_edsp_process2_loop_done:
ADDS r12, r12, #2
BLE celt_pitch_xcorr_edsp_process2_1
LDR r6, [r4], #4
@ Stall
SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0)
LDR r9, [r5], #4
SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1)
SUB r12, r12, #2
SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1)
MOV r8, r9
SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2)
celt_pitch_xcorr_edsp_process2_1:
LDRH r6, [r4], #2
ADDS r12, r12, #1
@ Stall
SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0)
LDRHGT r7, [r4], #2
SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1)
BLE celt_pitch_xcorr_edsp_process2_done
LDRH r9, [r5], #2
SMLABT r10, r7, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_1)
SMLABB r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_0, y_2)
celt_pitch_xcorr_edsp_process2_done:
@ Restore _x
SUB r4, r4, r3, LSL #1
@ Restore and advance _y
SUB r5, r5, r3, LSL #1
@ maxcorr = max(maxcorr, sum0)
CMP r0, r10
ADD r5, r5, #2
MOVLT r0, r10
SUB r1, r1, #2
@ maxcorr = max(maxcorr, sum1)
CMP r0, r11
@ xcorr[i] = sum
STR r10, [r2], #4
MOVLT r0, r11
STR r11, [r2], #4
celt_pitch_xcorr_edsp_process1a:
ADDS r1, r1, #1
BLT celt_pitch_xcorr_edsp_done
SUBS r12, r3, #4
@ r14 = sum = 0
MOV r14, #0
BLT celt_pitch_xcorr_edsp_process1a_loop_done
LDR r6, [r4], #4
LDR r8, [r5], #4
LDR r7, [r4], #4
LDR r9, [r5], #4
celt_pitch_xcorr_edsp_process1a_loop4:
SMLABB r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0)
SUBS r12, r12, #4 @ j-=4
SMLATT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1)
LDRGE r6, [r4], #4
SMLABB r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2)
LDRGE r8, [r5], #4
SMLATT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_3, y_3)
LDRGE r7, [r4], #4
LDRGE r9, [r5], #4
BGE celt_pitch_xcorr_edsp_process1a_loop4
celt_pitch_xcorr_edsp_process1a_loop_done:
ADDS r12, r12, #2
LDRGE r6, [r4], #4
LDRGE r8, [r5], #4
@ Stall
SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0)
SUBGE r12, r12, #2
SMLATTGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1)
ADDS r12, r12, #1
LDRHGE r6, [r4], #2
LDRHGE r8, [r5], #2
@ Stall
SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y)
@ maxcorr = max(maxcorr, sum)
CMP r0, r14
@ xcorr[i] = sum
STR r14, [r2], #4
MOVLT r0, r14
celt_pitch_xcorr_edsp_done:
LDMFD sp!, {r4-r11, pc}
.size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp @ ENDP
.endif
@ END:
.section .note.GNU-stack,"",%progbits

View File

@ -42,6 +42,7 @@ IF OPUS_ARM_MAY_HAVE_NEON
; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
xcorr_kernel_neon PROC
xcorr_kernel_neon_start
; input:
; r3 = int len
; r4 = opus_val16 *x
@ -181,7 +182,7 @@ celt_pitch_xcorr_neon_process4
VEOR q0, q0, q0
; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
; So we don't save/restore any other registers.
BL xcorr_kernel_neon
BL xcorr_kernel_neon_start
SUBS r6, r6, #4
VST1.32 {q0}, [r2]!
; _y += 4
@ -257,6 +258,7 @@ IF OPUS_ARM_MAY_HAVE_EDSP
; This will get used on ARMv7 devices without NEON, so it has been optimized
; to take advantage of dual-issuing where possible.
xcorr_kernel_edsp PROC
xcorr_kernel_edsp_start
; input:
; r3 = int len
; r4 = opus_val16 *_x (must be 32-bit aligned)
@ -309,7 +311,7 @@ xcorr_kernel_edsp_process4_done
SUBS r2, r2, #1 ; j--
; Stall
SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0)
LDRGTH r14, [r4], #2 ; r14 = *x++
LDRHGT r14, [r4], #2 ; r14 = *x++
SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1)
SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2)
SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3)
@ -319,7 +321,7 @@ xcorr_kernel_edsp_process4_done
SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2)
LDRH r10, [r5], #2 ; r10 = y_4 = *y++
SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3)
LDRGTH r12, [r4], #2 ; r12 = *x++
LDRHGT r12, [r4], #2 ; r12 = *x++
SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4)
BLE xcorr_kernel_edsp_done
SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2)
@ -327,7 +329,7 @@ xcorr_kernel_edsp_process4_done
SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3)
LDRH r2, [r5], #2 ; r2 = y_5 = *y++
SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4)
LDRGTH r14, [r4] ; r14 = *x
LDRHGT r14, [r4] ; r14 = *x
SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5)
BLE xcorr_kernel_edsp_done
SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3)
@ -387,11 +389,11 @@ celt_pitch_xcorr_edsp_process1u_loop4
celt_pitch_xcorr_edsp_process1u_loop4_done
ADDS r12, r12, #4
celt_pitch_xcorr_edsp_process1u_loop1
LDRGEH r6, [r4], #2
LDRHGE r6, [r4], #2
; Stall
SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
SUBGES r12, r12, #1
LDRGTH r8, [r5], #2
SUBSGE r12, r12, #1
LDRHGT r8, [r5], #2
BGT celt_pitch_xcorr_edsp_process1u_loop1
; Restore _x
SUB r4, r4, r3, LSL #1
@ -416,7 +418,7 @@ celt_pitch_xcorr_edsp_process4
MOV r7, #0
MOV r8, #0
MOV r9, #0
BL xcorr_kernel_edsp ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
BL xcorr_kernel_edsp_start ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
CMP r0, r6
; _y+=4
@ -474,7 +476,7 @@ celt_pitch_xcorr_edsp_process2_1
ADDS r12, r12, #1
; Stall
SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0)
LDRGTH r7, [r4], #2
LDRHGT r7, [r4], #2
SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1)
BLE celt_pitch_xcorr_edsp_process2_done
LDRH r9, [r5], #2
@ -527,8 +529,8 @@ celt_pitch_xcorr_edsp_process1a_loop_done
SUBGE r12, r12, #2
SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1)
ADDS r12, r12, #1
LDRGEH r6, [r4], #2
LDRGEH r8, [r5], #2
LDRHGE r6, [r4], #2
LDRHGE r8, [r5], #2
; Stall
SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
; maxcorr = max(maxcorr, sum)

View File

@ -0,0 +1,72 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file fft_arm.h
@brief ARM Neon Intrinsic optimizations for fft using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(FFT_ARM_H)
#define FFT_ARM_H
#include "config.h"
#include "kiss_fft.h"
#if defined(HAVE_ARM_NE10)
int opus_fft_alloc_arm_neon(kiss_fft_state *st);
void opus_fft_free_arm_neon(kiss_fft_state *st);
void opus_fft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
void opus_ifft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_FFT (1)
#define opus_fft_alloc_arch(_st, arch) \
((void)(arch), opus_fft_alloc_arm_neon(_st))
#define opus_fft_free_arch(_st, arch) \
((void)(arch), opus_fft_free_arm_neon(_st))
#define opus_fft(_st, _fin, _fout, arch) \
((void)(arch), opus_fft_neon(_st, _fin, _fout))
#define opus_ifft(_st, _fin, _fout, arch) \
((void)(arch), opus_ifft_neon(_st, _fin, _fout))
#endif /* OPUS_HAVE_RTCD */
#endif /* HAVE_ARM_NE10 */
#endif

View File

@ -0,0 +1,35 @@
/* Copyright (C) 2015 Vidyo */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FIXED_ARM64_H
#define FIXED_ARM64_H
#include <arm_neon.h>
#undef SIG2WORD16
#define SIG2WORD16(x) (vqmovns_s32(PSHR32((x), SIG_SHIFT)))
#endif

View File

@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
#undef MAC16_32_Q15
#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
Result fits in 32 bits. */
#undef MAC16_32_Q16
#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b))
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
#undef MULT32_32_Q31

View File

@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
}
#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
Result fits in 32 bits. */
#undef MAC16_32_Q16
static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a,
opus_val32 b)
{
int res;
__asm__(
"#MAC16_32_Q16\n\t"
"smlawb %0, %1, %2, %3;\n"
: "=r"(res)
: "r"(b), "r"(a), "r"(c)
);
return res;
}
#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b))
/** 16x16 multiply-add where the result fits in 32 bits */
#undef MAC16_16
static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
}
#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
#ifdef OPUS_ARM_INLINE_MEDIA
#undef SIG2WORD16
static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x)
{
celt_sig res;
__asm__(
"#SIG2WORD16\n\t"
"ssat %0, #16, %1, ASR #12\n\t"
: "=r"(res)
: "r"(x+2048)
);
return EXTRACT16(res);
}
#define SIG2WORD16(x) (SIG2WORD16_armv6(x))
#endif /* OPUS_ARM_INLINE_MEDIA */
#endif

View File

@ -0,0 +1,60 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file arm_mdct.h
@brief ARM Neon Intrinsic optimizations for mdct using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(MDCT_ARM_H)
#define MDCT_ARM_H
#include "config.h"
#include "mdct.h"
#if defined(HAVE_ARM_NE10)
/** Compute a forward MDCT and scale by 4/N, trashes the input array */
void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap,
int shift, int stride, int arch);
void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap,
int shift, int stride, int arch);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_MDCT (1)
#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
#endif /* OPUS_HAVE_RTCD */
#endif /* HAVE_ARM_NE10 */
#endif

View File

@ -46,12 +46,81 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
# endif
# if !defined(OPUS_HAVE_RTCD)
# if defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
extern opus_val32
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# elif defined(OPUS_ARM_PRESUME_EDSP) || \
defined(OPUS_ARM_PRESUME_MEDIA) || \
defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
# endif
# endif
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void xcorr_kernel_neon_fixed(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
# endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
# define OVERRIDE_XCORR_KERNEL (1)
# define xcorr_kernel(x, y, sum, len, arch) \
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_XCORR_KERNEL (1)
# define xcorr_kernel(x, y, sum, len, arch) \
((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len))
# endif
#else /* Start !FIXED_POINT */
/* Float case */
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
#endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
# endif
#endif /* end !FIXED_POINT */
#endif

View File

@ -92,11 +92,11 @@ static int bitexact_log2tan(int isin,int icos)
#ifdef FIXED_POINT
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
N = M*m->shortMdctSize;
N = m->shortMdctSize<<LM;
c=0; do {
for (i=0;i<end;i++)
{
@ -104,18 +104,23 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
opus_val32 maxval=0;
opus_val32 sum = 0;
j=M*eBands[i]; do {
maxval = MAX32(maxval, X[j+c*N]);
maxval = MAX32(maxval, -X[j+c*N]);
} while (++j<M*eBands[i+1]);
maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
if (maxval > 0)
{
int shift = celt_ilog2(maxval)-10;
j=M*eBands[i]; do {
sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
EXTRACT16(VSHR32(X[j+c*N],shift)));
} while (++j<M*eBands[i+1]);
int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
j=eBands[i]<<LM;
if (shift>0)
{
do {
sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
EXTRACT16(SHR32(X[j+c*N],shift)));
} while (++j<eBands[i+1]<<LM);
} else {
do {
sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
EXTRACT16(SHL32(X[j+c*N],-shift)));
} while (++j<eBands[i+1]<<LM);
}
/* We're adding one here to ensure the normalized band isn't larger than unity norm */
bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
} else {
@ -150,18 +155,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
#else /* FIXED_POINT */
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
N = M*m->shortMdctSize;
N = m->shortMdctSize<<LM;
c=0; do {
for (i=0;i<end;i++)
{
int j;
opus_val32 sum = 1e-27f;
for (j=M*eBands[i];j<M*eBands[i+1];j++)
sum += X[j+c*N]*X[j+c*N];
opus_val32 sum;
sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
bandE[i+c*m->nbEBands] = celt_sqrt(sum);
/*printf ("%f ", bandE[i+c*m->nbEBands]);*/
}
@ -190,74 +193,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
/* De-normalise the energy to produce the synthesis from the unit-energy bands */
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
int end, int M, int downsample, int silence)
{
int i, c, N;
int i, N;
int bound;
celt_sig * OPUS_RESTRICT f;
const celt_norm * OPUS_RESTRICT x;
const opus_int16 *eBands = m->eBands;
N = M*m->shortMdctSize;
celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
c=0; do {
celt_sig * OPUS_RESTRICT f;
const celt_norm * OPUS_RESTRICT x;
f = freq+c*N;
x = X+c*N+M*eBands[start];
for (i=0;i<M*eBands[start];i++)
*f++ = 0;
for (i=start;i<end;i++)
{
int j, band_end;
opus_val16 g;
opus_val16 lg;
bound = M*eBands[end];
if (downsample!=1)
bound = IMIN(bound, N/downsample);
if (silence)
{
bound = 0;
start = end = 0;
}
f = freq;
x = X+M*eBands[start];
for (i=0;i<M*eBands[start];i++)
*f++ = 0;
for (i=start;i<end;i++)
{
int j, band_end;
opus_val16 g;
opus_val16 lg;
#ifdef FIXED_POINT
int shift;
int shift;
#endif
j=M*eBands[i];
band_end = M*eBands[i+1];
lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
j=M*eBands[i];
band_end = M*eBands[i+1];
lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6)));
#ifndef FIXED_POINT
g = celt_exp2(lg);
g = celt_exp2(MIN32(32.f, lg));
#else
/* Handle the integer part of the log energy */
shift = 16-(lg>>DB_SHIFT);
if (shift>31)
/* Handle the integer part of the log energy */
shift = 16-(lg>>DB_SHIFT);
if (shift>31)
{
shift=0;
g=0;
} else {
/* Handle the fractional part. */
g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
}
/* Handle extreme gains with negative shift. */
if (shift<0)
{
/* For shift <= -2 and g > 16384 we'd be likely to overflow, so we're
capping the gain here, which is equivalent to a cap of 18 on lg.
This shouldn't trigger unless the bitstream is already corrupted. */
if (shift <= -2)
{
shift=0;
g=0;
} else {
/* Handle the fractional part. */
g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
g = 16384;
shift = -2;
}
/* Handle extreme gains with negative shift. */
if (shift<0)
{
/* For shift < -2 we'd be likely to overflow, so we're capping
the gain here. This shouldn't happen unless the bitstream is
already corrupted. */
if (shift < -2)
{
g = 32767;
shift = -2;
}
do {
*f++ = SHL32(MULT16_16(*x++, g), -shift);
} while (++j<band_end);
} else
do {
*f++ = SHL32(MULT16_16(*x++, g), -shift);
} while (++j<band_end);
} else
#endif
/* Be careful of the fixed-point "else" just above when changing this code */
do {
*f++ = SHR32(MULT16_16(*x++, g), shift);
} while (++j<band_end);
}
celt_assert(start <= end);
for (i=M*eBands[end];i<N;i++)
*f++ = 0;
} while (++c<C);
}
celt_assert(start <= end);
OPUS_CLEAR(&freq[bound], N-bound);
}
/* This prevents energy collapse for transients with multiple short MDCTs */
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
opus_val16 *prev2logE, int *pulses, opus_uint32 seed)
int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
{
int c, i, j, k;
for (i=start;i<end;i++)
@ -272,7 +281,8 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
N0 = m->eBands[i+1]-m->eBands[i];
/* depth in 1/8 bits */
depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM);
celt_assert(pulses[i]>=0);
depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
#ifdef FIXED_POINT
thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
@ -345,12 +355,36 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
}
/* We just added some energy, so we need to renormalise */
if (renormalize)
renormalise_vector(X, N0<<LM, Q15ONE);
renormalise_vector(X, N0<<LM, Q15ONE, arch);
} while (++c<C);
}
}
static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N)
/* Compute the weights to use for optimizing normalized distortion across
channels. We use the amplitude to weight square distortion, which means
that we use the square root of the value we would have been using if we
wanted to minimize the MSE in the non-normalized domain. This roughly
corresponds to some quick-and-dirty perceptual experiments I ran to
measure inter-aural masking (there doesn't seem to be any published data
on the topic). */
static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
{
celt_ener minE;
#if FIXED_POINT
int shift;
#endif
minE = MIN32(Ex, Ey);
/* Adjustment to make the weights a bit more conservative. */
Ex = ADD32(Ex, minE/3);
Ey = ADD32(Ey, minE/3);
#if FIXED_POINT
shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
#endif
w[0] = VSHR32(Ex, shift);
w[1] = VSHR32(Ey, shift);
}
static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
{
int i = bandID;
int j;
@ -370,25 +404,25 @@ static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, cons
celt_norm r, l;
l = X[j];
r = Y[j];
X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
/* Side is not encoded, no need to calculate */
}
}
static void stereo_split(celt_norm *X, celt_norm *Y, int N)
static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
{
int j;
for (j=0;j<N;j++)
{
celt_norm r, l;
l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]);
r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]);
X[j] = l+r;
Y[j] = r-l;
opus_val32 r, l;
l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
}
}
static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
{
int j;
opus_val32 xp=0, side=0;
@ -400,17 +434,16 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
opus_val32 t, lgain, rgain;
/* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
dual_inner_prod(Y, X, Y, N, &xp, &side);
dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
/* Compensating for the mid normalization */
xp = MULT16_32_Q15(mid, xp);
/* mid and side are in Q15, not Q14 like X and Y */
mid2 = SHR32(mid, 1);
mid2 = SHR16(mid, 1);
El = MULT16_16(mid2, mid2) + side - 2*xp;
Er = MULT16_16(mid2, mid2) + side + 2*xp;
if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
{
for (j=0;j<N;j++)
Y[j] = X[j];
OPUS_COPY(Y, X, N);
return;
}
@ -434,7 +467,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
{
celt_norm r, l;
/* Apply mid scaling (side is already scaled) */
l = MULT16_16_Q15(mid, X[j]);
l = MULT16_16_P15(mid, X[j]);
r = Y[j];
X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
@ -442,7 +475,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
}
/* Decide whether we should spread the pulses in the current frame */
int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
int last_decision, int *hf_average, int *tapset_decision, int update_hf,
int end, int C, int M)
{
@ -463,7 +496,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
{
int j, N, tmp=0;
int tcount[3] = {0,0,0};
celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
N = M*(eBands[i+1]-eBands[i]);
if (N<=8)
continue;
@ -483,7 +516,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
/* Only include four last bands (8 kHz and up) */
if (i>m->nbEBands-4)
hf_sum += 32*(tcount[1]+tcount[0])/N;
hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
sum += tmp*256;
nbBands++;
@ -493,7 +526,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
if (update_hf)
{
if (hf_sum)
hf_sum /= C*(4-m->nbEBands+end);
hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
*hf_average = (*hf_average+hf_sum)>>1;
hf_sum = *hf_average;
if (*tapset_decision==2)
@ -509,7 +542,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
}
/*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
celt_assert(nbBands>0); /* end has to be non-zero */
sum /= nbBands;
celt_assert(sum>=0);
sum = celt_udiv(sum, nbBands);
/* Recursive averaging */
sum = (sum+*average)>>1;
*average = sum;
@ -567,8 +601,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard
for (j=0;j<N0;j++)
tmp[i*N0+j] = X[j*stride+i];
}
for (j=0;j<N;j++)
X[j] = tmp[j];
OPUS_COPY(X, tmp, N);
RESTORE_STACK;
}
@ -591,8 +624,7 @@ static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
for (j=0;j<N0;j++)
tmp[j*stride+i] = X[i*N0+j];
}
for (j=0;j<N;j++)
X[j] = tmp[j];
OPUS_COPY(X, tmp, N);
RESTORE_STACK;
}
@ -603,11 +635,11 @@ void haar1(celt_norm *X, int N0, int stride)
for (i=0;i<stride;i++)
for (j=0;j<N0;j++)
{
celt_norm tmp1, tmp2;
tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]);
tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
X[stride*2*j+i] = tmp1 + tmp2;
X[stride*(2*j+1)+i] = tmp1 - tmp2;
opus_val32 tmp1, tmp2;
tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
}
}
@ -622,7 +654,8 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
/* The upper limit ensures that in a stereo split with itheta==16384, we'll
always have enough bits left over to code at least one pulse in the
side; otherwise it would collapse, since it doesn't get folded. */
qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
qb = celt_sudiv(b+N2*offset, N2);
qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
qb = IMIN(8<<BITRES, qb);
@ -638,6 +671,7 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
struct band_ctx {
int encode;
int resynth;
const CELTMode *m;
int i;
int intensity;
@ -647,6 +681,9 @@ struct band_ctx {
opus_int32 remaining_bits;
const celt_ener *bandE;
opus_uint32 seed;
int arch;
int theta_round;
int disable_inv;
};
struct split_ctx {
@ -698,14 +735,27 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
side and mid. With just that parameter, we can re-scale both
mid and side because we know that 1) they have unit norm and
2) they are orthogonal. */
itheta = stereo_itheta(X, Y, stereo, N);
itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
}
tell = ec_tell_frac(ec);
if (qn!=1)
{
if (encode)
itheta = (itheta*qn+8192)>>14;
{
if (!stereo || ctx->theta_round == 0)
{
itheta = (itheta*(opus_int32)qn+8192)>>14;
} else {
int down;
/* Bias quantization towards itheta=0 and itheta=16384. */
int bias = itheta > 8192 ? 32767/qn : -32767/qn;
down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
if (ctx->theta_round < 0)
itheta = down;
else
itheta = down+1;
}
}
/* Entropy coding of the angle. We use a uniform pdf for the
time split, a step for stereo, and a triangular one for the rest. */
if (stereo && N>2)
@ -769,7 +819,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
ec_dec_update(ec, fl, fl+fs, ft);
}
}
itheta = (opus_int32)itheta*16384/qn;
celt_assert(itheta>=0);
itheta = celt_udiv((opus_int32)itheta*16384, qn);
if (encode && stereo)
{
if (itheta==0)
@ -782,7 +833,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
} else if (stereo) {
if (encode)
{
inv = itheta > 8192;
inv = itheta > 8192 && !ctx->disable_inv;
if (inv)
{
int j;
@ -799,6 +850,9 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
inv = ec_dec_bit_logp(ec, 2);
} else
inv = 0;
/* inv flag override to avoid problems with downmixing. */
if (ctx->disable_inv)
inv = 0;
itheta = 0;
}
qalloc = ec_tell_frac(ec) - tell;
@ -834,11 +888,6 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
celt_norm *lowband_out)
{
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
int c;
int stereo;
celt_norm *x = X;
@ -863,7 +912,7 @@ static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
ctx->remaining_bits -= 1<<BITRES;
b-=1<<BITRES;
}
if (resynth)
if (ctx->resynth)
x[0] = sign ? -NORM_SCALING : NORM_SCALING;
x = Y;
} while (++c<1+stereo);
@ -888,11 +937,6 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
int B0=B;
opus_val16 mid=0, side=0;
unsigned cm=0;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
celt_norm *Y=NULL;
int encode;
const CELTMode *m;
@ -924,8 +968,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
fill = (fill&1)|(fill<<1);
B = (B+1)>>1;
compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
LM, 0, &fill);
compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill);
imid = sctx.imid;
iside = sctx.iside;
delta = sctx.delta;
@ -959,24 +1002,20 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
rebalance = ctx->remaining_bits;
if (mbits >= sbits)
{
cm = quant_partition(ctx, X, N, mbits, B,
lowband, LM,
cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
MULT16_16_P15(gain,mid), fill);
rebalance = mbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=0)
sbits += rebalance - (3<<BITRES);
cm |= quant_partition(ctx, Y, N, sbits, B,
next_lowband2, LM,
cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
} else {
cm = quant_partition(ctx, Y, N, sbits, B,
next_lowband2, LM,
cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
rebalance = sbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=16384)
mbits += rebalance - (3<<BITRES);
cm |= quant_partition(ctx, X, N, mbits, B,
lowband, LM,
cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
MULT16_16_P15(gain,mid), fill);
}
} else {
@ -1001,18 +1040,14 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
/* Finally do the actual quantization */
if (encode)
{
cm = alg_quant(X, N, K, spread, B, ec
#ifdef RESYNTH
, gain
#endif
);
cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch);
} else {
cm = alg_unquant(X, N, K, spread, B, ec, gain);
}
} else {
/* If there's no pulse, fill the band anyway */
int j;
if (resynth)
if (ctx->resynth)
{
unsigned cm_mask;
/* B can be as large as 16, so this shift might overflow an int on a
@ -1021,8 +1056,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
fill &= cm_mask;
if (!fill)
{
for (j=0;j<N;j++)
X[j] = 0;
OPUS_CLEAR(X, N);
} else {
if (lowband == NULL)
{
@ -1046,7 +1080,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
}
cm = fill;
}
renormalise_vector(X, N, gain);
renormalise_vector(X, N, gain, ctx->arch);
}
}
}
@ -1070,11 +1104,6 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
int recombine=0;
int longBlocks;
unsigned cm=0;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
int k;
int encode;
int tf_change;
@ -1084,7 +1113,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
longBlocks = B0==1;
N_B /= B;
N_B = celt_udiv(N_B, B);
/* Special case for one sample */
if (N==1)
@ -1098,9 +1127,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
{
int j;
for (j=0;j<N;j++)
lowband_scratch[j] = lowband[j];
OPUS_COPY(lowband_scratch, lowband, N);
lowband = lowband_scratch;
}
@ -1143,11 +1170,10 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
}
cm = quant_partition(ctx, X, N, b, B, lowband,
LM, gain, fill);
cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill);
/* This code is used by the decoder and by the resynthesis-enabled encoder */
if (resynth)
if (ctx->resynth)
{
/* Undo the sample reorganization going from time order to frequency order */
if (B0>1)
@ -1200,11 +1226,6 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
int inv = 0;
opus_val16 mid=0, side=0;
unsigned cm=0;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
int mbits, sbits, delta;
int itheta;
int qalloc;
@ -1224,8 +1245,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
orig_fill = fill;
compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
LM, 1, &fill);
compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill);
inv = sctx.inv;
imid = sctx.imid;
iside = sctx.iside;
@ -1273,13 +1293,13 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
sign = 1-2*sign;
/* We use orig_fill here because we want to fold the side, but if
itheta==16384, we'll have cleared the low bits of fill. */
cm = quant_band(ctx, x2, N, mbits, B, lowband,
LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
lowband_scratch, orig_fill);
/* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
and there's no need to worry about mixing with the other channel. */
y2[0] = -sign*x2[1];
y2[1] = sign*x2[0];
if (resynth)
if (ctx->resynth)
{
celt_norm tmp;
X[0] = MULT16_16_Q15(mid, X[0]);
@ -1306,41 +1326,35 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
{
/* In stereo mode, we do not apply a scaling to the mid because we need the normalized
mid for folding later. */
cm = quant_band(ctx, X, N, mbits, B,
lowband, LM, lowband_out,
Q15ONE, lowband_scratch, fill);
cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
lowband_scratch, fill);
rebalance = mbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=0)
sbits += rebalance - (3<<BITRES);
/* For a stereo split, the high bits of fill are always zero, so no
folding will be done to the side. */
cm |= quant_band(ctx, Y, N, sbits, B,
NULL, LM, NULL,
side, NULL, fill>>B);
cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
} else {
/* For a stereo split, the high bits of fill are always zero, so no
folding will be done to the side. */
cm = quant_band(ctx, Y, N, sbits, B,
NULL, LM, NULL,
side, NULL, fill>>B);
cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
rebalance = sbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=16384)
mbits += rebalance - (3<<BITRES);
/* In stereo mode, we do not apply a scaling to the mid because we need the normalized
mid for folding later. */
cm |= quant_band(ctx, X, N, mbits, B,
lowband, LM, lowband_out,
Q15ONE, lowband_scratch, fill);
cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
lowband_scratch, fill);
}
}
/* This code is used by the decoder and by the resynthesis-enabled encoder */
if (resynth)
if (ctx->resynth)
{
if (N!=2)
stereo_merge(X, Y, mid, N);
stereo_merge(X, Y, mid, N, ctx->arch);
if (inv)
{
int j;
@ -1351,17 +1365,38 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
return cm;
}
static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
{
int n1, n2;
const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
n1 = M*(eBands[start+1]-eBands[start]);
n2 = M*(eBands[start+2]-eBands[start+1]);
/* Duplicate enough of the first band folding data to be able to fold the second band.
Copies no data for CELT-only mode. */
OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
if (dual_stereo)
OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
}
void quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed)
celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
opus_uint32 *seed, int complexity, int arch, int disable_inv)
{
int i;
opus_int32 remaining_bits;
const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
VARDECL(celt_norm, _norm);
VARDECL(celt_norm, _lowband_scratch);
VARDECL(celt_norm, X_save);
VARDECL(celt_norm, Y_save);
VARDECL(celt_norm, X_save2);
VARDECL(celt_norm, Y_save2);
VARDECL(celt_norm, norm_save2);
int resynth_alloc;
celt_norm *lowband_scratch;
int B;
int M;
@ -1369,10 +1404,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
int update_lowband = 1;
int C = Y_ != NULL ? 2 : 1;
int norm_offset;
int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !encode;
int resynth = !encode || theta_rdo;
#endif
struct band_ctx ctx;
SAVE_STACK;
@ -1385,9 +1421,24 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
norm = _norm;
norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
/* We can use the last band as scratch space because we don't need that
scratch space for the last band. */
lowband_scratch = X_+M*eBands[m->nbEBands-1];
/* For decoding, we can use the last band as scratch space because we don't need that
scratch space for the last band and we don't care about the data there until we're
decoding the last band. */
if (encode && resynth)
resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
else
resynth_alloc = ALLOC_NONE;
ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
if (encode && resynth)
lowband_scratch = _lowband_scratch;
else
lowband_scratch = X_+M*eBands[m->nbEBands-1];
ALLOC(X_save, resynth_alloc, celt_norm);
ALLOC(Y_save, resynth_alloc, celt_norm);
ALLOC(X_save2, resynth_alloc, celt_norm);
ALLOC(Y_save2, resynth_alloc, celt_norm);
ALLOC(norm_save2, resynth_alloc, celt_norm);
lowband_offset = 0;
ctx.bandE = bandE;
@ -1397,6 +1448,10 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ctx.m = m;
ctx.seed = *seed;
ctx.spread = spread;
ctx.arch = arch;
ctx.disable_inv = disable_inv;
ctx.resynth = resynth;
ctx.theta_round = 0;
for (i=start;i<end;i++)
{
opus_int32 tell;
@ -1428,14 +1483,21 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ctx.remaining_bits = remaining_bits;
if (i <= codedBands-1)
{
curr_balance = balance / IMIN(3, codedBands-i);
curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
} else {
b = 0;
}
#ifdef ENABLE_UPDATE_DRAFT
if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
lowband_offset = i;
if (i == start+1)
special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
#else
if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
lowband_offset = i;
#endif
tf_change = tf_res[i];
ctx.tf_change = tf_change;
@ -1446,7 +1508,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
Y = norm;
lowband_scratch = NULL;
}
if (i==end-1)
if (last && !theta_rdo)
lowband_scratch = NULL;
/* Get a conservative estimate of the collapse_mask's for the bands we're
@ -1461,7 +1523,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
fold_start = lowband_offset;
while(M*eBands[--fold_start] > effective_lowband+norm_offset);
fold_end = lowband_offset-1;
#ifdef ENABLE_UPDATE_DRAFT
while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
#else
while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
#endif
x_cm = y_cm = 0;
fold_i = fold_start; do {
x_cm |= collapse_masks[fold_i*C+0];
@ -1494,13 +1560,77 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
} else {
if (Y!=NULL)
{
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
if (theta_rdo && i < intensity)
{
ec_ctx ec_save, ec_save2;
struct band_ctx ctx_save, ctx_save2;
opus_val32 dist0, dist1;
unsigned cm, cm2;
int nstart_bytes, nend_bytes, save_bytes;
unsigned char *bytes_buf;
unsigned char bytes_save[1275];
opus_val16 w[2];
compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
/* Make a copy. */
cm = x_cm|y_cm;
ec_save = *ec;
ctx_save = ctx;
OPUS_COPY(X_save, X, N);
OPUS_COPY(Y_save, Y, N);
/* Encode and round down. */
ctx.theta_round = -1;
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
/* Save first result. */
cm2 = x_cm;
ec_save2 = *ec;
ctx_save2 = ctx;
OPUS_COPY(X_save2, X, N);
OPUS_COPY(Y_save2, Y, N);
if (!last)
OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
nstart_bytes = ec_save.offs;
nend_bytes = ec_save.storage;
bytes_buf = ec_save.buf+nstart_bytes;
save_bytes = nend_bytes-nstart_bytes;
OPUS_COPY(bytes_save, bytes_buf, save_bytes);
/* Restore */
*ec = ec_save;
ctx = ctx_save;
OPUS_COPY(X, X_save, N);
OPUS_COPY(Y, Y_save, N);
if (i == start+1)
special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
/* Encode and round up. */
ctx.theta_round = 1;
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
if (dist0 >= dist1) {
x_cm = cm2;
*ec = ec_save2;
ctx = ctx_save2;
OPUS_COPY(X, X_save2, N);
OPUS_COPY(Y, Y_save2, N);
if (!last)
OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
OPUS_COPY(bytes_buf, bytes_save, save_bytes);
}
} else {
ctx.theta_round = 0;
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
}
} else {
x_cm = quant_band(&ctx, X, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
}
y_cm = x_cm;
}

View File

@ -41,7 +41,7 @@
* @param X Spectrum
* @param bandE Square root of the energy for each band (returned)
*/
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M);
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM);
/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
@ -59,14 +59,15 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
* @param bandE Square root of the energy for each band
*/
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M);
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start,
int end, int M, int downsample, int silence);
#define SPREAD_NONE (0)
#define SPREAD_LIGHT (1)
#define SPREAD_NORMAL (2)
#define SPREAD_AGGRESSIVE (3)
int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
int last_decision, int *hf_average, int *tapset_decision, int update_hf,
int end, int C, int M);
@ -97,15 +98,20 @@ void haar1(celt_norm *X, int N0, int stride);
* @param LM log2() of the number of 2.5 subframes in the frame
* @param codedBands Last band to receive bits + 1
* @param seed Random generator seed
* @param arch Run-time architecture (see opus_select_arch())
*/
void quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed);
celt_norm * X, celt_norm * Y, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed,
int complexity, int arch, int disable_inv);
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
opus_val16 *prev2logE, int *pulses, opus_uint32 seed);
void anti_collapse(const CELTMode *m, celt_norm *X_,
unsigned char *collapse_masks, int LM, int C, int size, int start,
int end, const opus_val16 *logE, const opus_val16 *prev1logE,
const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed,
int arch);
opus_uint32 celt_lcg_rand(opus_uint32 seed);

View File

@ -54,6 +54,10 @@
#define PACKAGE_VERSION "unknown"
#endif
#if defined(MIPSr1_ASM)
#include "mips/celt_mipsr1.h"
#endif
int resampling_factor(opus_int32 rate)
{
@ -85,8 +89,77 @@ int resampling_factor(opus_int32 rate)
return ret;
}
#ifndef OVERRIDE_COMB_FILTER_CONST
static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C)
/* This version should be faster on ARM */
#ifdef OPUS_ARM_ASM
#ifndef NON_STATIC_COMB_FILTER_CONST_C
static
#endif
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
opus_val32 x0, x1, x2, x3, x4;
int i;
x4 = SHL32(x[-T-2], 1);
x3 = SHL32(x[-T-1], 1);
x2 = SHL32(x[-T], 1);
x1 = SHL32(x[-T+1], 1);
for (i=0;i<N-4;i+=5)
{
opus_val32 t;
x0=SHL32(x[i-T+2],1);
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=SHL32(x[i-T+3],1);
t = MAC16_32_Q16(x[i+1], g10, x1);
t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
t = SATURATE(t, SIG_SAT);
y[i+1] = t;
x3=SHL32(x[i-T+4],1);
t = MAC16_32_Q16(x[i+2], g10, x0);
t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
t = SATURATE(t, SIG_SAT);
y[i+2] = t;
x2=SHL32(x[i-T+5],1);
t = MAC16_32_Q16(x[i+3], g10, x4);
t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
t = SATURATE(t, SIG_SAT);
y[i+3] = t;
x1=SHL32(x[i-T+6],1);
t = MAC16_32_Q16(x[i+4], g10, x3);
t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
t = SATURATE(t, SIG_SAT);
y[i+4] = t;
}
#ifdef CUSTOM_MODES
for (;i<N;i++)
{
opus_val32 t;
x0=SHL32(x[i-T+2],1);
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=x3;
x3=x2;
x2=x1;
x1=x0;
}
#endif
}
#else
#ifndef NON_STATIC_COMB_FILTER_CONST_C
static
#endif
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
opus_val32 x0, x1, x2, x3, x4;
@ -102,6 +175,7 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+ MULT16_32_Q15(g10,x2)
+ MULT16_32_Q15(g11,ADD32(x1,x3))
+ MULT16_32_Q15(g12,ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;
x2=x1;
@ -110,10 +184,12 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
}
#endif
#endif
#ifndef OVERRIDE_comb_filter
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap)
const opus_val16 *window, int overlap, int arch)
{
int i;
/* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
@ -131,16 +207,23 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
OPUS_MOVE(y, x, N);
return;
}
g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
/* When the gain is zero, T0 and/or T1 is set to zero. We need
to have then be at least 2 to avoid processing garbage data. */
T0 = IMAX(T0, COMBFILTER_MINPERIOD);
T1 = IMAX(T1, COMBFILTER_MINPERIOD);
g00 = MULT16_16_P15(g0, gains[tapset0][0]);
g01 = MULT16_16_P15(g0, gains[tapset0][1]);
g02 = MULT16_16_P15(g0, gains[tapset0][2]);
g10 = MULT16_16_P15(g1, gains[tapset1][0]);
g11 = MULT16_16_P15(g1, gains[tapset1][1]);
g12 = MULT16_16_P15(g1, gains[tapset1][2]);
x1 = x[-T1+1];
x2 = x[-T1 ];
x3 = x[-T1-1];
x4 = x[-T1-2];
/* If the filter didn't change, we don't need the overlap */
if (g0==g1 && T0==T1 && tapset0==tapset1)
overlap=0;
for (i=0;i<overlap;i++)
{
opus_val16 f;
@ -153,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+ MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+ MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+ MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;
x2=x1;
@ -168,14 +252,20 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
}
/* Compute the part with the constant filter. */
comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12, arch);
}
#endif /* OVERRIDE_comb_filter */
/* TF change table. Positive values mean better frequency resolution (longer
effective window), whereas negative values mean better time resolution
(shorter effective window). The second index is computed as:
4*isTransient + 2*tf_select + per_band_flag */
const signed char tf_select_table[4][8] = {
{0, -1, 0, -1, 0,-1, 0,-1},
{0, -1, 0, -2, 1, 0, 1,-1},
{0, -2, 0, -3, 2, 0, 1,-1},
{0, -2, 0, -3, 3, 0, 1,-1},
/*isTransient=0 isTransient=1 */
{0, -1, 0, -1, 0,-1, 0,-1}, /* 2.5 ms */
{0, -1, 0, -2, 1, 0, 1,-1}, /* 5 ms */
{0, -2, 0, -3, 2, 0, 1,-1}, /* 10 ms */
{0, -2, 0, -3, 3, 0, 1,-1}, /* 20 ms */
};
@ -213,6 +303,9 @@ const char *opus_strerror(int error)
const char *opus_get_version_string(void)
{
return "libopus " PACKAGE_VERSION
/* Applications may rely on the presence of this substring in the version
string to determine if they have a fixed-point or floating-point build
at runtime. */
#ifdef FIXED_POINT
"-fixed"
#endif

View File

@ -57,13 +57,21 @@ typedef struct {
float noisiness;
float activity;
float music_prob;
int bandwidth;
}AnalysisInfo;
int bandwidth;
float activity_probability;
} AnalysisInfo;
typedef struct {
int signalType;
int offset;
} SILKInfo;
#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
#define __celt_check_silkinfo_ptr(ptr) ((ptr) + ((ptr) - (const SILKInfo*)(ptr)))
/* Encoder/decoder Requests */
/* Expose this option again when variable framesize actually works */
@ -116,6 +124,9 @@ typedef struct {
#define OPUS_SET_ENERGY_MASK_REQUEST 10026
#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
#define CELT_SET_SILK_INFO_REQUEST 10028
#define CELT_SET_SILK_INFO(x) CELT_SET_SILK_INFO_REQUEST, __celt_check_silkinfo_ptr(x)
/* Encoder stuff */
int celt_encoder_get_size(int channels);
@ -134,7 +145,8 @@ int celt_decoder_get_size(int channels);
int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec);
int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
#define celt_encoder_ctl opus_custom_encoder_ctl
#define celt_decoder_ctl opus_custom_decoder_ctl
@ -200,15 +212,25 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap);
const opus_val16 *window, int overlap, int arch);
#ifdef NON_STATIC_COMB_FILTER_CONST_C
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12);
#endif
#ifndef OVERRIDE_COMB_FILTER_CONST
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
#endif
void init_caps(const CELTMode *m,int *cap,int LM,int C);
#ifdef RESYNTH
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
int LM, int downsample, int silence);
#endif
#ifdef __cplusplus

View File

@ -51,6 +51,9 @@
#include "celt_lpc.h"
#include "vq.h"
#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
#define NORM_ALIASING_HACK
#endif
/**********************************************************************/
/* */
/* DECODER */
@ -70,6 +73,7 @@ struct OpusCustomDecoder {
int downsample;
int start, end;
int signalling;
int disable_inv;
int arch;
/* Everything beyond this point gets cleared on a reset */
@ -79,6 +83,7 @@ struct OpusCustomDecoder {
int error;
int last_pitch_index;
int loss_count;
int skip_plc;
int postfilter_period;
int postfilter_period_old;
opus_val16 postfilter_gain;
@ -159,10 +164,13 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
st->start = 0;
st->end = st->mode->effEBands;
st->signalling = 1;
#ifdef ENABLE_UPDATE_DRAFT
st->disable_inv = channels == 1;
#else
st->disable_inv = 0;
#endif
st->arch = opus_select_arch();
st->loss_count = 0;
opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
return OPUS_OK;
@ -175,28 +183,62 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
}
#endif /* CUSTOM_MODES */
static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
#ifndef CUSTOM_MODES
/* Special case for stereo with no downsampling and no accumulation. This is
quite common and we can make it faster by processing both channels in the
same loop, reducing overhead due to the dependency loop in the IIR filter. */
static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
celt_sig *mem)
{
#ifdef FIXED_POINT
x = PSHR32(x, SIG_SHIFT);
x = MAX32(x, -32768);
x = MIN32(x, 32767);
return EXTRACT16(x);
#else
return (opus_val16)x;
#endif
celt_sig * OPUS_RESTRICT x0;
celt_sig * OPUS_RESTRICT x1;
celt_sig m0, m1;
int j;
x0=in[0];
x1=in[1];
m0 = mem[0];
m1 = mem[1];
for (j=0;j<N;j++)
{
celt_sig tmp0, tmp1;
/* Add VERY_SMALL to x[] first to reduce dependency chain. */
tmp0 = x0[j] + VERY_SMALL + m0;
tmp1 = x1[j] + VERY_SMALL + m1;
m0 = MULT16_32_Q15(coef0, tmp0);
m1 = MULT16_32_Q15(coef0, tmp1);
pcm[2*j ] = SCALEOUT(SIG2WORD16(tmp0));
pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
}
mem[0] = m0;
mem[1] = m1;
}
#endif
#ifndef RESYNTH
static
#endif
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
celt_sig *mem, int accum)
{
int c;
int Nd;
int apply_downsampling=0;
opus_val16 coef0;
VARDECL(celt_sig, scratch);
SAVE_STACK;
#ifndef CUSTOM_MODES
/* Short version for common case. */
if (downsample == 1 && C == 2 && !accum)
{
deemphasis_stereo_simple(in, pcm, N, coef[0], mem);
return;
}
#endif
#ifndef FIXED_POINT
(void)accum;
celt_assert(accum==0);
#endif
ALLOC(scratch, N, celt_sig);
coef0 = coef[0];
Nd = N/downsample;
c=0; do {
@ -227,18 +269,31 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
/* Shortcut for the standard (non-custom modes) case */
for (j=0;j<N;j++)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
celt_sig tmp = x[j] + VERY_SMALL + m;
m = MULT16_32_Q15(coef0, tmp);
scratch[j] = tmp;
}
apply_downsampling=1;
} else {
/* Shortcut for the standard (non-custom modes) case */
for (j=0;j<N;j++)
#ifdef FIXED_POINT
if (accum)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
m = MULT16_32_Q15(coef0, tmp);
y[j*C] = SCALEOUT(SIG2WORD16(tmp));
for (j=0;j<N;j++)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
m = MULT16_32_Q15(coef0, tmp);
y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
}
} else
#endif
{
for (j=0;j<N;j++)
{
celt_sig tmp = x[j] + VERY_SMALL + m;
m = MULT16_32_Q15(coef0, tmp);
y[j*C] = SCALEOUT(SIG2WORD16(tmp));
}
}
}
mem[c] = m;
@ -246,41 +301,101 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
if (apply_downsampling)
{
/* Perform down-sampling */
for (j=0;j<Nd;j++)
y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
#ifdef FIXED_POINT
if (accum)
{
for (j=0;j<Nd;j++)
y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
} else
#endif
{
for (j=0;j<Nd;j++)
y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
}
}
} while (++c<C);
RESTORE_STACK;
}
/** Compute the IMDCT and apply window for all sub-frames and
all channels in a frame */
#ifndef RESYNTH
static
#endif
void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
opus_val16 *oldBandE, int start, int effEnd, int C, int CC,
int isTransient, int LM, int downsample,
int silence, int arch)
{
int b, c;
int c, i;
int M;
int b;
int B;
int N;
int N, NB;
int shift;
const int overlap = OVERLAP(mode);
int nbEBands;
int overlap;
VARDECL(celt_sig, freq);
SAVE_STACK;
if (shortBlocks)
overlap = mode->overlap;
nbEBands = mode->nbEBands;
N = mode->shortMdctSize<<LM;
ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */
M = 1<<LM;
if (isTransient)
{
B = shortBlocks;
N = mode->shortMdctSize;
B = M;
NB = mode->shortMdctSize;
shift = mode->maxLM;
} else {
B = 1;
N = mode->shortMdctSize<<LM;
NB = mode->shortMdctSize<<LM;
shift = mode->maxLM-LM;
}
c=0; do {
/* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
if (CC==2&&C==1)
{
/* Copying a mono streams to two channels */
celt_sig *freq2;
denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
downsample, silence);
/* Store a temporary copy in the output buffer because the IMDCT destroys its input. */
freq2 = out_syn[1]+overlap/2;
OPUS_COPY(freq2, freq, N);
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
} while (++c<C);
clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B, arch);
} else if (CC==1&&C==2)
{
/* Downmixing a stereo stream to mono */
celt_sig *freq2;
freq2 = out_syn[0]+overlap/2;
denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
downsample, silence);
/* Use the output buffer as temp array before downmixing. */
denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
downsample, silence);
for (i=0;i<N;i++)
freq[i] = ADD32(HALF32(freq[i]), HALF32(freq2[i]));
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
} else {
/* Normal case (mono or stereo) */
c=0; do {
denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M,
downsample, silence);
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch);
} while (++c<CC);
}
/* Saturate IMDCT output so that we can't overflow in the pitch postfilter
or in the */
c=0; do {
for (i=0;i<N;i++)
out_syn[c][i] = SATURATE(out_syn[c][i], SIG_SAT);
} while (++c<CC);
RESTORE_STACK;
}
static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
@ -330,7 +445,23 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM,
pitch of 480 Hz. */
#define PLC_PITCH_LAG_MIN (100)
static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
{
int pitch_index;
VARDECL( opus_val16, lp_pitch_buf );
SAVE_STACK;
ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
pitch_downsample(decode_mem, lp_pitch_buf,
DECODE_BUFFER_SIZE, C, arch);
pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch);
pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
RESTORE_STACK;
return pitch_index;
}
static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
{
int c;
int i;
@ -343,11 +474,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
int nbEBands;
int overlap;
int start;
int downsample;
int loss_count;
int noise_based;
const opus_int16 *eBands;
VARDECL(celt_sig, scratch);
SAVE_STACK;
mode = st->mode;
@ -367,40 +496,37 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
loss_count = st->loss_count;
start = st->start;
downsample = st->downsample;
noise_based = loss_count >= 5 || start != 0;
ALLOC(scratch, noise_based?N*C:N, celt_sig);
noise_based = loss_count >= 5 || start != 0 || st->skip_plc;
if (noise_based)
{
/* Noise-based PLC/CNG */
celt_sig *freq;
#ifdef NORM_ALIASING_HACK
celt_norm *X;
#else
VARDECL(celt_norm, X);
#endif
opus_uint32 seed;
opus_val16 *plcLogE;
int end;
int effEnd;
opus_val16 decay;
end = st->end;
effEnd = IMAX(start, IMIN(end, mode->effEBands));
/* Share the interleaved signal MDCT coefficient buffer with the
deemphasis scratch buffer. */
freq = scratch;
#ifdef NORM_ALIASING_HACK
/* This is an ugly hack that breaks aliasing rules and would be easily broken,
but it saves almost 4kB of stack. */
X = (celt_norm*)(out_syn[C-1]+overlap/2);
#else
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
#endif
if (loss_count >= 5)
plcLogE = backgroundLogE;
else {
/* Energy decay */
opus_val16 decay = loss_count==0 ?
QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
c=0; do
{
for (i=start;i<end;i++)
oldBandE[c*nbEBands+i] -= decay;
} while (++c<C);
plcLogE = oldBandE;
}
/* Energy decay */
decay = loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
c=0; do
{
for (i=start;i<end;i++)
oldBandE[c*nbEBands+i] = MAX16(backgroundLogE[c*nbEBands+i], oldBandE[c*nbEBands+i] - decay);
} while (++c<C);
seed = st->rng;
for (c=0;c<C;c++)
{
@ -416,25 +542,17 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
seed = celt_lcg_rand(seed);
X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
}
renormalise_vector(X+boffs, blen, Q15ONE);
renormalise_vector(X+boffs, blen, Q15ONE, st->arch);
}
}
st->rng = seed;
denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
c=0; do {
int bound = eBands[effEnd]<<LM;
if (downsample!=1)
bound = IMIN(bound, N/downsample);
for (i=bound;i<N;i++)
freq[c*N+i] = 0;
} while (++c<C);
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
DECODE_BUFFER_SIZE-N+(overlap>>1));
} while (++c<C);
compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
} else {
/* Pitch-based PLC */
const opus_val16 *window;
@ -445,15 +563,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
if (loss_count == 0)
{
VARDECL( opus_val16, lp_pitch_buf );
ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
pitch_downsample(decode_mem, lp_pitch_buf,
DECODE_BUFFER_SIZE, C, st->arch);
pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
st->last_pitch_index = pitch_index;
st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
} else {
pitch_index = st->last_pitch_index;
fade = QCONST16(.8f,15);
@ -501,6 +611,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
#endif
}
_celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
#ifdef FIXED_POINT
/* For fixed-point, apply bandwidth expansion until we can guarantee that
no overflow can happen in the IIR filter. This means:
32768*sum(abs(filter)) < 2^31 */
while (1) {
opus_val16 tmp=Q15ONE;
opus_val32 sum=QCONST16(1., SIG_SHIFT);
for (i=0;i<LPC_ORDER;i++)
sum += ABS16(lpc[c*LPC_ORDER+i]);
if (sum < 65535) break;
for (i=0;i<LPC_ORDER;i++)
{
tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
}
}
#endif
}
/* We want the excitation for 2 pitch periods in order to look for a
decaying signal, but we can't get more than MAX_PERIOD. */
@ -516,7 +643,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
}
/* Compute the excitation for exc_length samples before the loss. */
celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch);
}
/* Check if the waveform is decaying, and if so how fast.
@ -570,9 +697,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
tmp = ROUND16(
buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
SIG_SHIFT);
S1 += SHR32(MULT16_16(tmp, tmp), 8);
S1 += SHR32(MULT16_16(tmp, tmp), 10);
}
{
opus_val16 lpc_mem[LPC_ORDER];
/* Copy the last decoded samples (prior to the overlap region) to
@ -583,7 +709,11 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
the signal domain. */
celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
lpc_mem);
lpc_mem, st->arch);
#ifdef FIXED_POINT
for (i=0; i < extrapolation_len; i++)
buf[DECODE_BUFFER_SIZE-N+i] = SATURATE(buf[DECODE_BUFFER_SIZE-N+i], SIG_SAT);
#endif
}
/* Check if the synthesis energy is higher than expected, which can
@ -594,7 +724,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
for (i=0;i<extrapolation_len;i++)
{
opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
S2 += SHR32(MULT16_16(tmp, tmp), 8);
S2 += SHR32(MULT16_16(tmp, tmp), 10);
}
/* This checks for an "explosion" in the synthesis. */
#ifdef FIXED_POINT
@ -631,7 +761,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
st->postfilter_period, st->postfilter_period, overlap,
-st->postfilter_gain, -st->postfilter_gain,
st->postfilter_tapset, st->postfilter_tapset, NULL, 0);
st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch);
/* Simulate TDAC on the concealed audio so that it blends with the
MDCT of the next frame. */
@ -644,22 +774,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
} while (++c<C);
}
deemphasis(out_syn, pcm, N, C, downsample,
mode->preemph, st->preemph_memD, scratch);
st->loss_count = loss_count+1;
RESTORE_STACK;
}
int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
{
int c, i, N;
int spread_decision;
opus_int32 bits;
ec_dec _dec;
VARDECL(celt_sig, freq);
#ifdef NORM_ALIASING_HACK
celt_norm *X;
#else
VARDECL(celt_norm, X);
#endif
VARDECL(int, fine_quant);
VARDECL(int, pulses);
VARDECL(int, cap);
@ -677,6 +808,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
int intra_ener;
const int CC = st->channels;
int LM, M;
int start;
int end;
int effEnd;
int codedBands;
int alloc_trim;
@ -703,11 +836,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
nbEBands = mode->nbEBands;
overlap = mode->overlap;
eBands = mode->eBands;
start = st->start;
end = st->end;
frame_size *= st->downsample;
c=0; do {
decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
} while (++c<CC);
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
oldBandE = lpc+CC*LPC_ORDER;
oldLogE = oldBandE + 2*nbEBands;
@ -725,7 +857,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
if (data0<0)
return OPUS_INVALID_PACKET;
}
st->end = IMAX(1, mode->effEBands-2*(data0>>5));
st->end = end = IMAX(1, mode->effEBands-2*(data0>>5));
LM = (data0>>3)&0x3;
C = 1 + ((data0>>2)&0x1);
data++;
@ -752,18 +884,27 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
return OPUS_BAD_ARG;
N = M*mode->shortMdctSize;
c=0; do {
decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
} while (++c<CC);
effEnd = st->end;
effEnd = end;
if (effEnd > mode->effEBands)
effEnd = mode->effEBands;
if (data == NULL || len<=1)
{
celt_decode_lost(st, pcm, N, LM);
celt_decode_lost(st, N, LM);
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
RESTORE_STACK;
return frame_size/st->downsample;
}
/* Check if there are at least two packets received consecutively before
* turning on the pitch-based PLC */
st->skip_plc = st->loss_count != 0;
if (dec == NULL)
{
ec_dec_init(&_dec,(unsigned char*)data,len);
@ -795,7 +936,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
postfilter_gain = 0;
postfilter_pitch = 0;
postfilter_tapset = 0;
if (st->start==0 && tell+16 <= total_bits)
if (start==0 && tell+16 <= total_bits)
{
if(ec_dec_bit_logp(dec, 1))
{
@ -826,11 +967,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
/* Decode the global flags (first symbols in the stream) */
intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
/* Get band energies */
unquant_coarse_energy(mode, st->start, st->end, oldBandE,
unquant_coarse_energy(mode, start, end, oldBandE,
intra_ener, dec, C, LM);
ALLOC(tf_res, nbEBands, int);
tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
tf_decode(start, end, isTransient, tf_res, LM, dec);
tell = ec_tell(dec);
spread_decision = SPREAD_NORMAL;
@ -846,7 +987,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
dynalloc_logp = 6;
total_bits<<=BITRES;
tell = ec_tell_frac(dec);
for (i=st->start;i<st->end;i++)
for (i=start;i<end;i++)
{
int width, quanta;
int dynalloc_loop_logp;
@ -885,84 +1026,63 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
ALLOC(pulses, nbEBands, int);
ALLOC(fine_priority, nbEBands, int);
codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
codedBands = compute_allocation(mode, start, end, offsets, cap,
alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C);
unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
} while (++c<CC);
/* Decode fixed codebook */
ALLOC(collapse_masks, C*nbEBands, unsigned char);
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
#ifdef NORM_ALIASING_HACK
/* This is an ugly hack that breaks aliasing rules and would be easily broken,
but it saves almost 4kB of stack. */
X = (celt_norm*)(out_syn[CC-1]+overlap/2);
#else
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
#endif
quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, 0,
st->arch, st->disable_inv);
if (anti_collapse_rsv > 0)
{
anti_collapse_on = ec_dec_bits(dec, 1);
}
unquant_energy_finalise(mode, st->start, st->end, oldBandE,
unquant_energy_finalise(mode, start, end, oldBandE,
fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
if (anti_collapse_on)
anti_collapse(mode, X, collapse_masks, LM, C, N,
st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch);
if (silence)
{
for (i=0;i<C*nbEBands;i++)
oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
for (i=0;i<C*N;i++)
freq[i] = 0;
} else {
/* Synthesis */
denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
}
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
} while (++c<CC);
c=0; do {
int bound = M*eBands[effEnd];
if (st->downsample!=1)
bound = IMIN(bound, N/st->downsample);
for (i=bound;i<N;i++)
freq[c*N+i] = 0;
} while (++c<C);
c=0; do {
out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
} while (++c<CC);
if (CC==2&&C==1)
{
for (i=0;i<N;i++)
freq[N+i] = freq[i];
}
if (CC==1&&C==2)
{
for (i=0;i<N;i++)
freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
}
/* Compute inverse MDCTs */
compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd,
C, CC, isTransient, LM, st->downsample, silence, st->arch);
c=0; do {
st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
mode->window, overlap);
mode->window, overlap, st->arch);
if (LM!=0)
comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
mode->window, overlap);
mode->window, overlap, st->arch);
} while (++c<CC);
st->postfilter_period_old = st->postfilter_period;
@ -978,32 +1098,36 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
st->postfilter_tapset_old = st->postfilter_tapset;
}
if (C==1) {
for (i=0;i<nbEBands;i++)
oldBandE[nbEBands+i]=oldBandE[i];
}
if (C==1)
OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
/* In case start or end were to change */
if (!isTransient)
{
opus_val16 max_background_increase;
OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands);
OPUS_COPY(oldLogE, oldBandE, 2*nbEBands);
/* In normal circumstances, we only allow the noise floor to increase by
up to 2.4 dB/second, but when we're in DTX, we allow up to 6 dB
increase for each update.*/
if (st->loss_count < 10)
max_background_increase = M*QCONST16(0.001f,DB_SHIFT);
else
max_background_increase = QCONST16(1.f,DB_SHIFT);
for (i=0;i<2*nbEBands;i++)
oldLogE2[i] = oldLogE[i];
for (i=0;i<2*nbEBands;i++)
oldLogE[i] = oldBandE[i];
for (i=0;i<2*nbEBands;i++)
backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]);
} else {
for (i=0;i<2*nbEBands;i++)
oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
}
c=0; do
{
for (i=0;i<st->start;i++)
for (i=0;i<start;i++)
{
oldBandE[c*nbEBands+i]=0;
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
}
for (i=st->end;i<nbEBands;i++)
for (i=end;i<nbEBands;i++)
{
oldBandE[c*nbEBands+i]=0;
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
@ -1011,8 +1135,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
} while (++c<2);
st->rng = dec->rng;
/* We reuse freq[] as scratch space for the de-emphasis */
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
st->loss_count = 0;
RESTORE_STACK;
if (ec_tell(dec) > 8*len)
@ -1028,7 +1151,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
#ifdef FIXED_POINT
int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
{
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
}
#ifndef DISABLE_FLOAT_API
@ -1045,7 +1168,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char
N = frame_size;
ALLOC(out, C*N, opus_int16);
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
if (ret>0)
for (j=0;j<C*ret;j++)
pcm[j]=out[j]*(1.f/32768.f);
@ -1059,7 +1182,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char
int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
{
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
}
int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
@ -1075,7 +1198,7 @@ int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data
N = frame_size;
ALLOC(out, C*N, celt_sig);
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
if (ret>0)
for (j=0;j<C*ret;j++)
@ -1149,6 +1272,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
((char*)&st->DECODER_RESET_START - (char*)st));
for (i=0;i<2*st->mode->nbEBands;i++)
oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
st->skip_plc = 1;
}
break;
case OPUS_GET_PITCH_REQUEST:
@ -1181,6 +1305,26 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
*value=st->rng;
}
break;
case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
if(value<0 || value>1)
{
goto bad_arg;
}
st->disable_inv = value;
}
break;
case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 *value = va_arg(ap, opus_int32*);
if (!value)
{
goto bad_arg;
}
*value = st->disable_inv;
}
break;
default:
goto bad_request;
}

File diff suppressed because it is too large Load Diff

View File

@ -49,8 +49,7 @@ int p
float *lpc = _lpc;
#endif
for (i = 0; i < p; i++)
lpc[i] = 0;
OPUS_CLEAR(lpc, p);
if (ac[0] != 0)
{
for (i = 0; i < p; i++) {
@ -88,12 +87,15 @@ int p
#endif
}
void celt_fir(const opus_val16 *_x,
void celt_fir_c(
const opus_val16 *_x,
const opus_val16 *num,
opus_val16 *_y,
int N,
int ord,
opus_val16 *mem)
opus_val16 *mem,
int arch)
{
int i,j;
VARDECL(opus_val16, rnum);
@ -111,6 +113,7 @@ void celt_fir(const opus_val16 *_x,
for(i=0;i<ord;i++)
mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT
(void)arch;
for (i=0;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
@ -124,7 +127,7 @@ void celt_fir(const opus_val16 *_x,
for (i=0;i<N-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
xcorr_kernel(rnum, x+i, sum, ord);
xcorr_kernel(rnum, x+i, sum, ord, arch);
_y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
_y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
_y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
@ -146,10 +149,12 @@ void celt_iir(const opus_val32 *_x,
opus_val32 *_y,
int N,
int ord,
opus_val16 *mem)
opus_val16 *mem,
int arch)
{
#ifdef SMALL_FOOTPRINT
int i,j;
(void)arch;
for (i=0;i<N;i++)
{
opus_val32 sum = _x[i];
@ -161,7 +166,7 @@ void celt_iir(const opus_val32 *_x,
{
mem[j]=mem[j-1];
}
mem[0] = ROUND16(sum,SIG_SHIFT);
mem[0] = SROUND16(sum, SIG_SHIFT);
_y[i] = sum;
}
#else
@ -187,23 +192,23 @@ void celt_iir(const opus_val32 *_x,
sum[1]=_x[i+1];
sum[2]=_x[i+2];
sum[3]=_x[i+3];
xcorr_kernel(rden, y+i, sum, ord);
xcorr_kernel(rden, y+i, sum, ord, arch);
/* Patch up the result to compensate for the fact that this is an IIR */
y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT);
y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
_y[i ] = sum[0];
sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
_y[i+1] = sum[1];
sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
_y[i+2] = sum[2];
sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
_y[i+3] = sum[3];
}
for (;i<N;i++)
@ -211,7 +216,7 @@ void celt_iir(const opus_val32 *_x,
opus_val32 sum = _x[i];
for (j=0;j<ord;j++)
sum -= MULT16_16(rden[j],y[i+j]);
y[i+ord] = ROUND16(sum,SIG_SHIFT);
y[i+ord] = SROUND16(sum,SIG_SHIFT);
_y[i] = sum;
}
for(i=0;i<ord;i++)

View File

@ -29,24 +29,37 @@
#define PLC_H
#include "arch.h"
#include "cpu_support.h"
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
#include "x86/celt_lpc_sse.h"
#endif
#define LPC_ORDER 24
void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
void celt_fir(const opus_val16 *x,
void celt_fir_c(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem);
opus_val16 *mem,
int arch);
#if !defined(OVERRIDE_CELT_FIR)
#define celt_fir(x, num, y, N, ord, mem, arch) \
(celt_fir_c(x, num, y, N, ord, mem, arch))
#endif
void celt_iir(const opus_val32 *x,
const opus_val16 *den,
opus_val32 *y,
int N,
int ord,
opus_val16 *mem);
opus_val16 *mem,
int arch);
int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
const opus_val16 *window, int overlap, int lag, int n, int arch);

View File

@ -31,7 +31,8 @@
#include "opus_types.h"
#include "opus_defines.h"
#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM)
#if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/armcpu.h"
/* We currently support 4 ARM variants:
@ -42,6 +43,22 @@
*/
#define OPUS_ARCHMASK 3
#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:
* arch[0] -> non-sse
* arch[1] -> sse
* arch[2] -> sse2
* arch[3] -> sse4.1
* arch[4] -> avx
*/
#define OPUS_ARCHMASK 7
int opus_select_arch(void);
#else
#define OPUS_ARCHMASK 0
@ -50,5 +67,4 @@ static OPUS_INLINE int opus_select_arch(void)
return 0;
}
#endif
#endif

View File

@ -74,7 +74,7 @@ int log2_frac(opus_uint32 val, int frac)
/*Although derived separately, the pulse vector coding scheme is equivalent to
a Pyramid Vector Quantizer \cite{Fis86}.
Some additional notes about an early version appear at
http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
and the definitions of some terms have evolved since that was written.
The conversion from a pulse vector to an integer index (encoding) and back
@ -460,10 +460,12 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
}
static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
opus_uint32 p;
int s;
int k0;
opus_int16 val;
opus_val32 yy=0;
celt_assert(_k>0);
celt_assert(_n>1);
while(_n>2){
@ -487,7 +489,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
}
else for(p=row[_k];p>_i;p=row[_k])_k--;
_i-=p;
*_y++=(k0-_k+s)^s;
val=(k0-_k+s)^s;
*_y++=val;
yy=MAC16_16(yy,val,val);
}
/*Lots of dimensions case:*/
else{
@ -507,7 +511,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
do p=CELT_PVQ_U_ROW[--_k][_n];
while(p>_i);
_i-=p;
*_y++=(k0-_k+s)^s;
val=(k0-_k+s)^s;
*_y++=val;
yy=MAC16_16(yy,val,val);
}
}
_n--;
@ -519,14 +525,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
k0=_k;
_k=(_i+1)>>1;
if(_k)_i-=2*_k-1;
*_y++=(k0-_k+s)^s;
val=(k0-_k+s)^s;
*_y++=val;
yy=MAC16_16(yy,val,val);
/*_n==1*/
s=-(int)_i;
*_y=(_k+s)^s;
val=(_k+s)^s;
*_y=val;
yy=MAC16_16(yy,val,val);
return yy;
}
void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
}
#else /* SMALL_FOOTPRINT */
@ -591,8 +602,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
_y: Returns the vector of pulses.
_u: Must contain entries [0..._k+1] of row _n of U() on input.
Its contents will be destructively modified.*/
static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
int j;
opus_int16 val;
opus_val32 yy=0;
celt_assert(_n>0);
j=0;
do{
@ -607,10 +620,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
while(p>_i)p=_u[--_k];
_i-=p;
yj-=_k;
_y[j]=(yj+s)^s;
val=(yj+s)^s;
_y[j]=val;
yy=MAC16_16(yy,val,val);
uprev(_u,_k+2,0);
}
while(++j<_n);
return yy;
}
/*Returns the index of the given combination of K elements chosen from a set
@ -685,13 +701,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
RESTORE_STACK;
}
void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
VARDECL(opus_uint32,u);
int ret;
SAVE_STACK;
celt_assert(_k>0);
ALLOC(u,_k+2U,opus_uint32);
cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
RESTORE_STACK;
return ret;
}
#endif /* SMALL_FOOTPRINT */

View File

@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac);
void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
void decode_pulses(int *_y, int N, int K, ec_dec *dec);
opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec);
#endif /* CWRS_H */

View File

@ -62,6 +62,27 @@ int ec_ilog(opus_uint32 _v){
}
#endif
#if 1
/* This is a faster version of ec_tell_frac() that takes advantage
of the low (1/8 bit) resolution to use just a linear function
followed by a lookup to determine the exact transition thresholds. */
opus_uint32 ec_tell_frac(ec_ctx *_this){
static const unsigned correction[8] =
{35733, 38967, 42495, 46340,
50535, 55109, 60097, 65535};
opus_uint32 nbits;
opus_uint32 r;
int l;
unsigned b;
nbits=_this->nbits_total<<BITRES;
l=EC_ILOG(_this->rng);
r=_this->rng>>(l-16);
b = (r>>12)-8;
b += r>correction[b];
l = (l<<3)+b;
return nbits-l;
}
#else
opus_uint32 ec_tell_frac(ec_ctx *_this){
opus_uint32 nbits;
opus_uint32 r;
@ -91,3 +112,42 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){
}
return nbits-l;
}
#endif
#ifdef USE_SMALL_DIV_TABLE
/* Result of 2^32/(2*i+1), except for i=0. */
const opus_uint32 SMALL_DIV_TABLE[129] = {
0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
0x01073260, 0x0105197F, 0x0103091B, 0x01010101
};
#endif

View File

@ -34,6 +34,12 @@
# include <stddef.h>
# include "ecintrin.h"
extern const opus_uint32 SMALL_DIV_TABLE[129];
#ifdef OPUS_ARM_ASM
#define USE_SMALL_DIV_TABLE
#endif
/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
larger type, you can speed up the decoder by using it here.*/
typedef opus_uint32 ec_window;
@ -114,4 +120,33 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){
rounding error is in the positive direction).*/
opus_uint32 ec_tell_frac(ec_ctx *_this);
/* Tested exhaustively for all n and for 1<=d<=256 */
static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
celt_assert(d>0);
#ifdef USE_SMALL_DIV_TABLE
if (d>256)
return n/d;
else {
opus_uint32 t, q;
t = EC_ILOG(d&-d);
q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32;
return q+(n-q*d >= d);
}
#else
return n/d;
#endif
}
static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) {
celt_assert(d>0);
#ifdef USE_SMALL_DIV_TABLE
if (n<0)
return -(opus_int32)celt_udiv(-n, d);
else
return celt_udiv(n, d);
#else
return n/d;
#endif
}
#endif

View File

@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
unsigned ec_decode(ec_dec *_this,unsigned _ft){
unsigned s;
_this->ext=_this->rng/_ft;
_this->ext=celt_udiv(_this->rng,_ft);
s=(unsigned)(_this->val/_this->ext);
return _ft-EC_MINI(s+1,_ft);
}

View File

@ -98,7 +98,7 @@ static void ec_enc_carry_out(ec_enc *_this,int _c){
else _this->ext++;
}
static void ec_enc_normalize(ec_enc *_this){
static OPUS_INLINE void ec_enc_normalize(ec_enc *_this){
/*If the range is too small, output some bits and rescale it.*/
while(_this->rng<=EC_CODE_BOT){
ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT));
@ -127,7 +127,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){
void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
opus_uint32 r;
r=_this->rng/_ft;
r=celt_udiv(_this->rng,_ft);
if(_fl>0){
_this->val+=_this->rng-IMUL32(r,(_ft-_fl));
_this->rng=IMUL32(r,(_fh-_fl));

View File

@ -59,6 +59,12 @@ extern opus_int64 celt_mips;
#define SHR(a,b) SHR32(a,b)
#define PSHR(a,b) PSHR32(a,b)
/** Add two 32-bit values, ignore any overflows */
#define ADD32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
/** Subtract two 32-bit values, ignore any overflows */
#define SUB32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
#define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(-(opus_uint32)(a)))
static OPUS_INLINE short NEG16(int x)
{
int res;
@ -227,6 +233,8 @@ static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
#define SROUND16(x,a) (celt_mips--,EXTRACT16(SATURATE(PSHR32(x,a), 32767)));
#define HALF16(x) (SHR16(x,1))
#define HALF32(x) (SHR32(x,1))
@ -496,6 +504,7 @@ static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int
#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
#define MAC16_32_Q16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q16((a),(b))))
static OPUS_INLINE int SATURATE(int a, int b)
{
@ -767,6 +776,16 @@ static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
return res;
}
static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
{
x = PSHR32(x, SIG_SHIFT);
x = MAX32(x, -32768);
x = MIN32(x, 32767);
return EXTRACT16(x);
}
#define SIG2WORD16(x) (SIG2WORD16_generic(x))
#undef PRINT_MIPS
#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0);

View File

@ -37,16 +37,32 @@
#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16))
#else
#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
#endif
/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16))
#else
#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
#endif
/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15))
#else
#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
#endif
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31))
#else
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
#endif
/** Compile-time conversion of float constant to 16-bit value */
#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
@ -88,6 +104,9 @@
/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
/** Shift by a and round-to-neareast 32-bit value. Result is a saturated 16-bit value */
#define SROUND16(x,a) EXTRACT16(SATURATE(PSHR32(x,a), 32767));
/** Divide by two */
#define HALF16(x) (SHR16(x,1))
#define HALF32(x) (SHR32(x,1))
@ -101,6 +120,12 @@
/** Subtract two 32-bit values */
#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
/** Add two 32-bit values, ignore any overflows */
#define ADD32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
/** Subtract two 32-bit values, ignore any overflows */
#define SUB32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
#define NEG32_ovflw(a) ((opus_val32)(-(opus_uint32)(a)))
/** 16x16 multiplication where the result fits in 16 bits */
#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b))))
@ -113,7 +138,11 @@
/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
b must fit in 31 bits.
Result fits in 32 bits. */
#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add.
Results fits in 32 bits */
#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)))
#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
@ -131,4 +160,17 @@
/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
#if defined(MIPSr1_ASM)
#include "mips/fixed_generic_mipsr1.h"
#endif
static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
{
x = PSHR32(x, SIG_SHIFT);
x = MAX32(x, -32768);
x = MIN32(x, 32767);
return EXTRACT16(x);
}
#define SIG2WORD16(x) (SIG2WORD16_generic(x))
#endif

View File

@ -61,7 +61,13 @@
** the config.h file.
*/
#if (HAVE_LRINTF)
/* With GCC, when SSE is available, the fastest conversion is cvtss2si. */
#if defined(__GNUC__) && defined(__SSE__)
#include <xmmintrin.h>
static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_ss(x));}
#elif defined(HAVE_LRINTF)
/* These defines enable functionality introduced with the 1999 ISO C
** standard. They must be defined before the inclusion of math.h to
@ -90,14 +96,14 @@
#include <math.h>
#define float2int(x) lrint(x)
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64))
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64)
#include <xmmintrin.h>
__inline long int float2int(float value)
{
return _mm_cvtss_si32(_mm_load_ss(&value));
}
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32))
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86)
#include <math.h>
/* Win32 doesn't seem to have these functions.

View File

@ -47,64 +47,56 @@
static void kf_bfly2(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
int N
)
{
kiss_fft_cpx * Fout2;
const kiss_twiddle_cpx * tw1;
int i,j;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
int i;
(void)m;
#ifdef CUSTOM_MODES
if (m==1)
{
Fout = Fout_beg + i*mm;
Fout2 = Fout + m;
tw1 = st->twiddles;
for(j=0;j<m;j++)
celt_assert(m==1);
for (i=0;i<N;i++)
{
kiss_fft_cpx t;
Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
C_MUL (t, *Fout2 , *tw1);
tw1 += fstride;
Fout2 = Fout + 1;
t = *Fout2;
C_SUB( *Fout2 , *Fout , t );
C_ADDTO( *Fout , t );
++Fout2;
++Fout;
Fout += 2;
}
}
}
static void ki_bfly2(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
)
{
kiss_fft_cpx * Fout2;
const kiss_twiddle_cpx * tw1;
kiss_fft_cpx t;
int i,j;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
} else
#endif
{
Fout = Fout_beg + i*mm;
Fout2 = Fout + m;
tw1 = st->twiddles;
for(j=0;j<m;j++)
opus_val16 tw;
tw = QCONST16(0.7071067812f, 15);
/* We know that m==4 here because the radix-2 is just after a radix-4 */
celt_assert(m==4);
for (i=0;i<N;i++)
{
C_MULC (t, *Fout2 , *tw1);
tw1 += fstride;
C_SUB( *Fout2 , *Fout , t );
C_ADDTO( *Fout , t );
++Fout2;
++Fout;
kiss_fft_cpx t;
Fout2 = Fout + 4;
t = Fout2[0];
C_SUB( Fout2[0] , Fout[0] , t );
C_ADDTO( Fout[0] , t );
t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
C_SUB( Fout2[1] , Fout[1] , t );
C_ADDTO( Fout[1] , t );
t.r = Fout2[2].i;
t.i = -Fout2[2].r;
C_SUB( Fout2[2] , Fout[2] , t );
C_ADDTO( Fout[2] , t );
t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
C_SUB( Fout2[3] , Fout[3] , t );
C_ADDTO( Fout[3] , t );
Fout += 8;
}
}
}
@ -118,88 +110,66 @@ static void kf_bfly4(
int mm
)
{
const kiss_twiddle_cpx *tw1,*tw2,*tw3;
kiss_fft_cpx scratch[6];
const size_t m2=2*m;
const size_t m3=3*m;
int i, j;
int i;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
if (m==1)
{
Fout = Fout_beg + i*mm;
tw3 = tw2 = tw1 = st->twiddles;
for (j=0;j<m;j++)
/* Degenerate case where all the twiddles are 1. */
for (i=0;i<N;i++)
{
C_MUL4(scratch[0],Fout[m] , *tw1 );
C_MUL4(scratch[1],Fout[m2] , *tw2 );
C_MUL4(scratch[2],Fout[m3] , *tw3 );
kiss_fft_cpx scratch0, scratch1;
Fout->r = PSHR32(Fout->r, 2);
Fout->i = PSHR32(Fout->i, 2);
C_SUB( scratch[5] , *Fout, scratch[1] );
C_ADDTO(*Fout, scratch[1]);
C_ADD( scratch[3] , scratch[0] , scratch[2] );
C_SUB( scratch[4] , scratch[0] , scratch[2] );
C_SUB( Fout[m2], *Fout, scratch[3] );
tw1 += fstride;
tw2 += fstride*2;
tw3 += fstride*3;
C_ADDTO( *Fout , scratch[3] );
C_SUB( scratch0 , *Fout, Fout[2] );
C_ADDTO(*Fout, Fout[2]);
C_ADD( scratch1 , Fout[1] , Fout[3] );
C_SUB( Fout[2], *Fout, scratch1 );
C_ADDTO( *Fout , scratch1 );
C_SUB( scratch1 , Fout[1] , Fout[3] );
Fout[m].r = scratch[5].r + scratch[4].i;
Fout[m].i = scratch[5].i - scratch[4].r;
Fout[m3].r = scratch[5].r - scratch[4].i;
Fout[m3].i = scratch[5].i + scratch[4].r;
++Fout;
Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
Fout+=4;
}
} else {
int j;
kiss_fft_cpx scratch[6];
const kiss_twiddle_cpx *tw1,*tw2,*tw3;
const int m2=2*m;
const int m3=3*m;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw3 = tw2 = tw1 = st->twiddles;
/* m is guaranteed to be a multiple of 4. */
for (j=0;j<m;j++)
{
C_MUL(scratch[0],Fout[m] , *tw1 );
C_MUL(scratch[1],Fout[m2] , *tw2 );
C_MUL(scratch[2],Fout[m3] , *tw3 );
C_SUB( scratch[5] , *Fout, scratch[1] );
C_ADDTO(*Fout, scratch[1]);
C_ADD( scratch[3] , scratch[0] , scratch[2] );
C_SUB( scratch[4] , scratch[0] , scratch[2] );
C_SUB( Fout[m2], *Fout, scratch[3] );
tw1 += fstride;
tw2 += fstride*2;
tw3 += fstride*3;
C_ADDTO( *Fout , scratch[3] );
Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
++Fout;
}
}
}
}
static void ki_bfly4(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
)
{
const kiss_twiddle_cpx *tw1,*tw2,*tw3;
kiss_fft_cpx scratch[6];
const size_t m2=2*m;
const size_t m3=3*m;
int i, j;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw3 = tw2 = tw1 = st->twiddles;
for (j=0;j<m;j++)
{
C_MULC(scratch[0],Fout[m] , *tw1 );
C_MULC(scratch[1],Fout[m2] , *tw2 );
C_MULC(scratch[2],Fout[m3] , *tw3 );
C_SUB( scratch[5] , *Fout, scratch[1] );
C_ADDTO(*Fout, scratch[1]);
C_ADD( scratch[3] , scratch[0] , scratch[2] );
C_SUB( scratch[4] , scratch[0] , scratch[2] );
C_SUB( Fout[m2], *Fout, scratch[3] );
tw1 += fstride;
tw2 += fstride*2;
tw3 += fstride*3;
C_ADDTO( *Fout , scratch[3] );
Fout[m].r = scratch[5].r - scratch[4].i;
Fout[m].i = scratch[5].i + scratch[4].r;
Fout[m3].r = scratch[5].r + scratch[4].i;
Fout[m3].i = scratch[5].i - scratch[4].r;
++Fout;
}
}
}
#ifndef RADIX_TWO_ONLY
@ -220,14 +190,19 @@ static void kf_bfly3(
kiss_twiddle_cpx epi3;
kiss_fft_cpx * Fout_beg = Fout;
#ifdef FIXED_POINT
/*epi3.r = -16384;*/ /* Unused */
epi3.i = -28378;
#else
epi3 = st->twiddles[fstride*m];
#endif
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw1=tw2=st->twiddles;
/* For non-custom modes, m is guaranteed to be a multiple of 4. */
k=m;
do {
C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
C_MUL(scratch[1],Fout[m] , *tw1);
C_MUL(scratch[2],Fout[m2] , *tw2);
@ -237,74 +212,26 @@ static void kf_bfly3(
tw1 += fstride;
tw2 += fstride*2;
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
C_MULBYSCALAR( scratch[0] , epi3.i );
C_ADDTO(*Fout,scratch[3]);
Fout[m2].r = Fout[m].r + scratch[0].i;
Fout[m2].i = Fout[m].i - scratch[0].r;
Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
Fout[m].r -= scratch[0].i;
Fout[m].i += scratch[0].r;
Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
++Fout;
} while(--k);
}
}
static void ki_bfly3(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
)
{
int i, k;
const size_t m2 = 2*m;
const kiss_twiddle_cpx *tw1,*tw2;
kiss_fft_cpx scratch[5];
kiss_twiddle_cpx epi3;
kiss_fft_cpx * Fout_beg = Fout;
epi3 = st->twiddles[fstride*m];
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw1=tw2=st->twiddles;
k=m;
do{
C_MULC(scratch[1],Fout[m] , *tw1);
C_MULC(scratch[2],Fout[m2] , *tw2);
C_ADD(scratch[3],scratch[1],scratch[2]);
C_SUB(scratch[0],scratch[1],scratch[2]);
tw1 += fstride;
tw2 += fstride*2;
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
C_MULBYSCALAR( scratch[0] , -epi3.i );
C_ADDTO(*Fout,scratch[3]);
Fout[m2].r = Fout[m].r + scratch[0].i;
Fout[m2].i = Fout[m].i - scratch[0].r;
Fout[m].r -= scratch[0].i;
Fout[m].i += scratch[0].r;
++Fout;
}while(--k);
}
}
#ifndef OVERRIDE_kf_bfly5
static void kf_bfly5(
kiss_fft_cpx * Fout,
const size_t fstride,
@ -317,13 +244,19 @@ static void kf_bfly5(
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
int i, u;
kiss_fft_cpx scratch[13];
const kiss_twiddle_cpx * twiddles = st->twiddles;
const kiss_twiddle_cpx *tw;
kiss_twiddle_cpx ya,yb;
kiss_fft_cpx * Fout_beg = Fout;
ya = twiddles[fstride*m];
yb = twiddles[fstride*2*m];
#ifdef FIXED_POINT
ya.r = 10126;
ya.i = -31164;
yb.r = -26510;
yb.i = -19261;
#else
ya = st->twiddles[fstride*m];
yb = st->twiddles[fstride*2*m];
#endif
tw=st->twiddles;
for (i=0;i<N;i++)
@ -335,8 +268,8 @@ static void kf_bfly5(
Fout3=Fout0+3*m;
Fout4=Fout0+4*m;
/* For non-custom modes, m is guaranteed to be a multiple of 4. */
for ( u=0; u<m; ++u ) {
C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
scratch[0] = *Fout0;
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
@ -349,22 +282,22 @@ static void kf_bfly5(
C_ADD( scratch[8],scratch[2],scratch[3]);
C_SUB( scratch[9],scratch[2],scratch[3]);
Fout0->r += scratch[7].r + scratch[8].r;
Fout0->i += scratch[7].i + scratch[8].i;
Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
C_SUB(*Fout1,scratch[5],scratch[6]);
C_ADD(*Fout4,scratch[5],scratch[6]);
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
C_ADD(*Fout2,scratch[11],scratch[12]);
C_SUB(*Fout3,scratch[11],scratch[12]);
@ -373,74 +306,8 @@ static void kf_bfly5(
}
}
}
#endif /* OVERRIDE_kf_bfly5 */
static void ki_bfly5(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
)
{
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
int i, u;
kiss_fft_cpx scratch[13];
const kiss_twiddle_cpx * twiddles = st->twiddles;
const kiss_twiddle_cpx *tw;
kiss_twiddle_cpx ya,yb;
kiss_fft_cpx * Fout_beg = Fout;
ya = twiddles[fstride*m];
yb = twiddles[fstride*2*m];
tw=st->twiddles;
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
Fout0=Fout;
Fout1=Fout0+m;
Fout2=Fout0+2*m;
Fout3=Fout0+3*m;
Fout4=Fout0+4*m;
for ( u=0; u<m; ++u ) {
scratch[0] = *Fout0;
C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
C_ADD( scratch[7],scratch[1],scratch[4]);
C_SUB( scratch[10],scratch[1],scratch[4]);
C_ADD( scratch[8],scratch[2],scratch[3]);
C_SUB( scratch[9],scratch[2],scratch[3]);
Fout0->r += scratch[7].r + scratch[8].r;
Fout0->i += scratch[7].i + scratch[8].i;
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
C_SUB(*Fout1,scratch[5],scratch[6]);
C_ADD(*Fout4,scratch[5],scratch[6]);
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
C_ADD(*Fout2,scratch[11],scratch[12]);
C_SUB(*Fout3,scratch[11],scratch[12]);
++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
}
}
}
#endif
@ -488,6 +355,9 @@ static
int kf_factor(int n,opus_int16 * facbuf)
{
int p=4;
int i;
int stages=0;
int nbak = n;
/*factor out powers of 4, powers of 2, then any remaining primes */
do {
@ -509,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf)
{
return 0;
}
*facbuf++ = p;
*facbuf++ = n;
facbuf[2*stages] = p;
if (p==2 && stages > 1)
{
facbuf[2*stages] = 4;
facbuf[2] = 2;
}
stages++;
} while (n > 1);
n = nbak;
/* Reverse the order to get the radix 4 at the end, so we can use the
fast degenerate case. It turns out that reversing the order also
improves the noise behaviour. */
for (i=0;i<stages/2;i++)
{
int tmp;
tmp = facbuf[2*i];
facbuf[2*i] = facbuf[2*(stages-i-1)];
facbuf[2*(stages-i-1)] = tmp;
}
for (i=0;i<stages;i++)
{
n /= facbuf[2*i];
facbuf[2*i+1] = n;
}
return 1;
}
@ -532,13 +423,19 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
#endif
}
int opus_fft_alloc_arch_c(kiss_fft_state *st) {
(void)st;
return 0;
}
/*
*
* Allocates all necessary storage space for the fft and ifft.
* The return value is a contiguous block of memory. As such,
* It can be freed with free().
* */
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
const kiss_fft_state *base, int arch)
{
kiss_fft_state *st=NULL;
size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
@ -555,14 +452,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
kiss_twiddle_cpx *twiddles;
st->nfft=nfft;
#ifndef FIXED_POINT
#ifdef FIXED_POINT
st->scale_shift = celt_ilog2(st->nfft);
if (st->nfft == 1<<st->scale_shift)
st->scale = Q15ONE;
else
st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
#else
st->scale = 1.f/nfft;
#endif
if (base != NULL)
{
st->twiddles = base->twiddles;
st->shift = 0;
while (nfft<<st->shift != base->nfft && st->shift < 32)
while (st->shift < 32 && nfft<<st->shift != base->nfft)
st->shift++;
if (st->shift>=32)
goto fail;
@ -581,22 +484,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
if (st->bitrev==NULL)
goto fail;
compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
/* Initialize architecture specific fft parameters */
if (opus_fft_alloc_arch(st, arch))
goto fail;
}
return st;
fail:
opus_fft_free(st);
opus_fft_free(st, arch);
return NULL;
}
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
{
return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
}
void opus_fft_free(const kiss_fft_state *cfg)
void opus_fft_free_arch_c(kiss_fft_state *st) {
(void)st;
}
void opus_fft_free(const kiss_fft_state *cfg, int arch)
{
if (cfg)
{
opus_fft_free_arch((kiss_fft_state *)cfg, arch);
opus_free((opus_int16*)cfg->bitrev);
if (cfg->shift < 0)
opus_free((kiss_twiddle_cpx*)cfg->twiddles);
@ -606,7 +518,7 @@ void opus_fft_free(const kiss_fft_state *cfg)
#endif /* CUSTOM_MODES */
void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
{
int m2, m;
int p;
@ -618,17 +530,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
/* st->shift can be -1 */
shift = st->shift>0 ? st->shift : 0;
celt_assert2 (fin != fout, "In-place FFT not supported");
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
{
fout[st->bitrev[i]] = fin[i];
#ifndef FIXED_POINT
fout[st->bitrev[i]].r *= st->scale;
fout[st->bitrev[i]].i *= st->scale;
#endif
}
fstride[0] = 1;
L=0;
do {
@ -647,7 +548,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
switch (st->factors[2*i])
{
case 2:
kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
kf_bfly2(fout, m, fstride[i]);
break;
case 4:
kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
@ -665,55 +566,39 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
}
}
void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
int m2, m;
int p;
int L;
int fstride[MAXFACTORS];
int i;
int shift;
opus_val16 scale;
#ifdef FIXED_POINT
/* Allows us to scale with MULT16_32_Q16(), which is faster than
MULT16_32_Q15() on ARM. */
int scale_shift = st->scale_shift-1;
#endif
scale = st->scale;
/* st->shift can be -1 */
shift = st->shift>0 ? st->shift : 0;
celt_assert2 (fin != fout, "In-place FFT not supported");
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
{
kiss_fft_cpx x = fin[i];
fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
}
opus_fft_impl(st, fout);
}
void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
int i;
celt_assert2 (fin != fout, "In-place FFT not supported");
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
fout[st->bitrev[i]] = fin[i];
fstride[0] = 1;
L=0;
do {
p = st->factors[2*L];
m = st->factors[2*L+1];
fstride[L+1] = fstride[L]*p;
L++;
} while(m!=1);
m = st->factors[2*L-1];
for (i=L-1;i>=0;i--)
{
if (i!=0)
m2 = st->factors[2*i-1];
else
m2 = 1;
switch (st->factors[2*i])
{
case 2:
ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
case 4:
ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
#ifndef RADIX_TWO_ONLY
case 3:
ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
case 5:
ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
#endif
}
m = m2;
}
for (i=0;i<st->nfft;i++)
fout[i].i = -fout[i].i;
opus_fft_impl(st, fout);
for (i=0;i<st->nfft;i++)
fout[i].i = -fout[i].i;
}

View File

@ -32,6 +32,7 @@
#include <stdlib.h>
#include <math.h>
#include "arch.h"
#include "cpu_support.h"
#ifdef __cplusplus
extern "C" {
@ -77,17 +78,28 @@ typedef struct {
4*4*4*2
*/
typedef struct arch_fft_state{
int is_supported;
void *priv;
} arch_fft_state;
typedef struct kiss_fft_state{
int nfft;
#ifndef FIXED_POINT
kiss_fft_scalar scale;
opus_val16 scale;
#ifdef FIXED_POINT
int scale_shift;
#endif
int shift;
opus_int16 factors[2*MAXFACTORS];
const opus_int16 *bitrev;
const kiss_twiddle_cpx *twiddles;
arch_fft_state *arch_fft;
} kiss_fft_state;
#if defined(HAVE_ARM_NE10)
#include "arm/fft_arm.h"
#endif
/*typedef struct kiss_fft_state* kiss_fft_cfg;*/
/**
@ -113,9 +125,9 @@ typedef struct kiss_fft_state{
* buffer size in *lenmem.
* */
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base);
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
/**
* opus_fft(cfg,in_out_buf)
@ -127,10 +139,59 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
* Note that each element is complex and can be accessed like
f[k].r and f[k].i
* */
void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_fft_free(const kiss_fft_state *cfg);
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
void opus_fft_free(const kiss_fft_state *cfg, int arch);
void opus_fft_free_arch_c(kiss_fft_state *st);
int opus_fft_alloc_arch_c(kiss_fft_state *st);
#if !defined(OVERRIDE_OPUS_FFT)
/* Is run-time CPU detection enabled on this platform? */
#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(
kiss_fft_state *st);
#define opus_fft_alloc_arch(_st, arch) \
((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(
kiss_fft_state *st);
#define opus_fft_free_arch(_st, arch) \
((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
#define opus_fft(_cfg, _fin, _fout, arch) \
((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
#define opus_ifft(_cfg, _fin, _fout, arch) \
((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
#define opus_fft_alloc_arch(_st, arch) \
((void)(arch), opus_fft_alloc_arch_c(_st))
#define opus_fft_free_arch(_st, arch) \
((void)(arch), opus_fft_free_arch_c(_st))
#define opus_fft(_cfg, _fin, _fout, arch) \
((void)(arch), opus_fft_c(_cfg, _fin, _fout))
#define opus_ifft(_cfg, _fin, _fout, arch) \
((void)(arch), opus_ifft_c(_cfg, _fin, _fout))
#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
#endif /* end if !defined(OVERRIDE_OPUS_FFT) */
#ifdef __cplusplus
}

View File

@ -164,7 +164,7 @@ opus_val16 celt_cos_norm(opus_val32 x)
{
return _celt_cos_pi_2(EXTRACT16(x));
} else {
return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
}
} else {
if (x&0x0000ffff)

View File

@ -38,11 +38,44 @@
#include "entcode.h"
#include "os_support.h"
#define PI 3.141592653f
/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
unsigned isqrt32(opus_uint32 _val);
/* CELT doesn't need it for fixed-point, by analysis.c does. */
#if !defined(FIXED_POINT) || defined(ANALYSIS_C)
#define cA 0.43157974f
#define cB 0.67848403f
#define cC 0.08595542f
#define cE ((float)PI/2)
static OPUS_INLINE float fast_atan2f(float y, float x) {
float x2, y2;
x2 = x*x;
y2 = y*y;
/* For very small values, we don't care about the answer, so
we can just return 0. */
if (x2 + y2 < 1e-18f)
{
return 0;
}
if(x2<y2){
float den = (y2 + cB*x2) * (y2 + cC*x2);
return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
}else{
float den = (x2 + cB*y2) * (x2 + cC*y2);
return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
}
}
#undef cA
#undef cB
#undef cC
#undef cD
#endif
#ifndef OVERRIDE_CELT_MAXABS16
static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
{
@ -80,7 +113,6 @@ static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
#ifndef FIXED_POINT
#define PI 3.141592653f
#define celt_sqrt(x) ((float)sqrt(x))
#define celt_rsqrt(x) (1.f/celt_sqrt(x))
#define celt_rsqrt_norm(x) (celt_rsqrt(x))

View File

@ -53,76 +53,100 @@
#include "mathops.h"
#include "stack_alloc.h"
#if defined(MIPSr1_ASM)
#include "mips/mdct_mipsr1.h"
#endif
#ifdef CUSTOM_MODES
int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
{
int i;
int N4;
kiss_twiddle_scalar *trig;
#if defined(FIXED_POINT)
int shift;
int N2=N>>1;
#endif
l->n = N;
N4 = N>>2;
l->maxshift = maxshift;
for (i=0;i<=maxshift;i++)
{
if (i==0)
l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
else
l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
#ifndef ENABLE_TI_DSPLIB55
if (l->kfft[i]==NULL)
return 0;
#endif
}
l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar));
if (l->trig==NULL)
return 0;
/* We have enough points that sine isn't necessary */
for (shift=0;shift<=maxshift;shift++)
{
/* We have enough points that sine isn't necessary */
#if defined(FIXED_POINT)
for (i=0;i<=N4;i++)
trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
#if 1
for (i=0;i<N2;i++)
trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N));
#else
for (i=0;i<=N4;i++)
trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
for (i=0;i<N2;i++)
trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N))));
#endif
#else
for (i=0;i<N2;i++)
trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N);
#endif
trig += N2;
N2 >>= 1;
N >>= 1;
}
return 1;
}
void clt_mdct_clear(mdct_lookup *l)
void clt_mdct_clear(mdct_lookup *l, int arch)
{
int i;
for (i=0;i<=l->maxshift;i++)
opus_fft_free(l->kfft[i]);
opus_fft_free(l->kfft[i], arch);
opus_free((kiss_twiddle_scalar*)l->trig);
}
#endif /* CUSTOM_MODES */
/* Forward MDCT trashes the input array */
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap, int shift, int stride)
#ifndef OVERRIDE_clt_mdct_forward
void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
kiss_twiddle_scalar sine;
VARDECL(kiss_fft_scalar, f);
VARDECL(kiss_fft_scalar, f2);
VARDECL(kiss_fft_cpx, f2);
const kiss_fft_state *st = l->kfft[shift];
const kiss_twiddle_scalar *trig;
opus_val16 scale;
#ifdef FIXED_POINT
/* Allows us to scale with MULT16_32_Q16(), which is faster than
MULT16_32_Q15() on ARM. */
int scale_shift = st->scale_shift-1;
#endif
SAVE_STACK;
(void)arch;
scale = st->scale;
N = l->n;
N >>= shift;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
ALLOC(f2, N2, kiss_fft_scalar);
/* sin(x) ~= x here */
#ifdef FIXED_POINT
sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
#else
sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
#endif
ALLOC(f2, N4, kiss_fft_cpx);
/* Consider the input to be composed of four blocks: [a, b, c, d] */
/* Window, shuffle, fold */
@ -167,123 +191,131 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
/* Pre-rotation */
{
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar *t = &l->trig[0];
const kiss_twiddle_scalar *t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_cpx yc;
kiss_twiddle_scalar t0, t1;
kiss_fft_scalar re, im, yr, yi;
re = yp[0];
im = yp[1];
yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
/* works because the cos is nearly one */
*yp++ = yr + S_MUL(yi,sine);
*yp++ = yi - S_MUL(yr,sine);
t0 = t[i];
t1 = t[N4+i];
re = *yp++;
im = *yp++;
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
yc.r = yr;
yc.i = yi;
yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
f2[st->bitrev[i]] = yc;
}
}
/* N/4 complex FFT, down-scales by 4/N */
opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
/* N/4 complex FFT, does not downscale anymore */
opus_fft_impl(st, f2);
/* Post-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
const kiss_twiddle_scalar *t = &l->trig[0];
const kiss_twiddle_scalar *t = &trig[0];
/* Temp pointers to make it really clear to the compiler what we're doing */
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
/* works because the cos is nearly one */
*yp1 = yr - S_MUL(yi,sine);
*yp2 = yi + S_MUL(yr,sine);;
fp += 2;
yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
*yp1 = yr;
*yp2 = yi;
fp++;
yp1 += 2*stride;
yp2 -= 2*stride;
}
}
RESTORE_STACK;
}
#endif /* OVERRIDE_clt_mdct_forward */
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
#ifndef OVERRIDE_clt_mdct_backward
void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
kiss_twiddle_scalar sine;
VARDECL(kiss_fft_scalar, f2);
SAVE_STACK;
const kiss_twiddle_scalar *trig;
(void) arch;
N = l->n;
N >>= shift;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f2, N2, kiss_fft_scalar);
/* sin(x) ~= x here */
#ifdef FIXED_POINT
sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
#else
sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
#endif
/* Pre-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
kiss_fft_scalar * OPUS_RESTRICT yp = f2;
const kiss_twiddle_scalar *t = &l->trig[0];
kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
for(i=0;i<N4;i++)
{
int rev;
kiss_fft_scalar yr, yi;
yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
/* works because the cos is nearly one */
*yp++ = yr - S_MUL(yi,sine);
*yp++ = yi + S_MUL(yr,sine);
rev = *bitrev++;
yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i]));
yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i]));
/* We swap real and imag because we use an FFT instead of an IFFT. */
yp[2*rev+1] = yr;
yp[2*rev] = yi;
/* Storing the pre-rotation directly in the bitrev order. */
xp1+=2*stride;
xp2-=2*stride;
}
}
/* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
it in-place. */
{
kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
const kiss_twiddle_scalar *t = &l->trig[0];
kiss_fft_scalar * yp0 = out+(overlap>>1);
kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
const kiss_twiddle_scalar *t = &trig[0];
/* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
middle pair will be computed twice. */
for(i=0;i<(N4+1)>>1;i++)
{
kiss_fft_scalar re, im, yr, yi;
kiss_twiddle_scalar t0, t1;
re = yp0[0];
im = yp0[1];
t0 = t[i<<shift];
t1 = t[(N4-i)<<shift];
/* We swap real and imag because we're using an FFT instead of an IFFT. */
re = yp0[1];
im = yp0[0];
t0 = t[i];
t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
re = yp1[0];
im = yp1[1];
/* works because the cos is nearly one */
yp0[0] = -(yr - S_MUL(yi,sine));
yp1[1] = yi + S_MUL(yr,sine);
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
/* We swap real and imag because we're using an FFT instead of an IFFT. */
re = yp1[1];
im = yp1[0];
yp0[0] = yr;
yp1[1] = yi;
t0 = t[(N4-i-1)<<shift];
t1 = t[(i+1)<<shift];
t0 = t[(N4-i-1)];
t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
/* works because the cos is nearly one */
yp1[0] = -(yr - S_MUL(yi,sine));
yp0[1] = yi + S_MUL(yr,sine);
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
yp1[0] = yr;
yp0[1] = yi;
yp0 += 2;
yp1 -= 2;
}
@ -301,11 +333,11 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
kiss_fft_scalar x1, x2;
x1 = *xp1;
x2 = *yp1;
*yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
*xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
*yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
*xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
wp1++;
wp2--;
}
}
RESTORE_STACK;
}
#endif /* OVERRIDE_clt_mdct_backward */

View File

@ -53,18 +53,60 @@ typedef struct {
const kiss_twiddle_scalar * OPUS_RESTRICT trig;
} mdct_lookup;
int clt_mdct_init(mdct_lookup *l,int N, int maxshift);
void clt_mdct_clear(mdct_lookup *l);
#if defined(HAVE_ARM_NE10)
#include "arm/mdct_arm.h"
#endif
int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
void clt_mdct_clear(mdct_lookup *l, int arch);
/** Compute a forward MDCT and scale by 4/N, trashes the input array */
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap, int shift, int stride);
void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap,
int shift, int stride, int arch);
/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
(scales implicitly by 1/2) */
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
const opus_val16 * OPUS_RESTRICT window,
int overlap, int shift, int stride, int arch);
#if !defined(OVERRIDE_OPUS_MDCT)
/* Is run-time CPU detection enabled on this platform? */
#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10)
extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
int overlap, int shift, int stride, int arch);
#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
_window, _overlap, _shift, \
_stride, _arch))
extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(
const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
int overlap, int shift, int stride, int arch);
#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
(*CLT_MDCT_BACKWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
_window, _overlap, _shift, \
_stride, _arch)
#else /* if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) */
#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
clt_mdct_backward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
#endif /* end if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT) */
#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
#endif

View File

@ -37,6 +37,7 @@
#include "os_support.h"
#include "stack_alloc.h"
#include "quant_bands.h"
#include "cpu_support.h"
static const opus_int16 eband5ms[] = {
/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */
@ -229,6 +230,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
opus_val16 *window;
opus_int16 *logN;
int LM;
int arch = opus_select_arch();
ALLOC_STACK;
#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
if (global_stack==NULL)
@ -389,7 +391,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
compute_pulse_cache(mode, mode->maxLM);
if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
mode->maxLM) == 0)
mode->maxLM, arch) == 0)
goto failure;
if (error)
@ -408,6 +410,8 @@ failure:
#ifdef CUSTOM_MODES
void opus_custom_mode_destroy(CELTMode *mode)
{
int arch = opus_select_arch();
if (mode == NULL)
return;
#ifndef CUSTOM_MODES_ONLY
@ -431,7 +435,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
opus_free((opus_int16*)mode->cache.index);
opus_free((unsigned char*)mode->cache.bits);
opus_free((unsigned char*)mode->cache.caps);
clt_mdct_clear(&mode->mdct);
clt_mdct_clear(&mode->mdct, arch);
opus_free((CELTMode *)mode);
}

View File

@ -39,14 +39,6 @@
#define MAX_PERIOD 1024
#ifndef OVERLAP
#define OVERLAP(mode) ((mode)->overlap)
#endif
#ifndef FRAMESIZE
#define FRAMESIZE(mode) ((mode)->mdctSize)
#endif
typedef struct {
int size;
const opus_int16 *index;

View File

@ -67,18 +67,18 @@ static OPUS_INLINE void opus_free (void *ptr)
}
#endif
/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking */
/** Copy n elements from src to dst. The 0* term provides compile-time type checking */
#ifndef OVERRIDE_OPUS_COPY
#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
#endif
/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term
/** Copy n elements from src to dst, allowing overlapping regions. The 0* term
provides compile-time type checking */
#ifndef OVERRIDE_OPUS_MOVE
#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
#endif
/** Set n elements of dst to zero, starting at address s */
/** Set n elements of dst to zero */
#ifndef OVERRIDE_OPUS_CLEAR
#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
#endif

View File

@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
}
#if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */
/* Pure C implementation. */
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
#if defined(OVERRIDE_PITCH_XCORR)
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch)
#else
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch)
#endif
{
#if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */
int i, j;
#ifdef FIXED_POINT
opus_val32 maxcorr=1;
#endif
#if !defined(OVERRIDE_PITCH_XCORR)
(void)arch;
#endif
for (i=0;i<max_pitch;i++)
{
opus_val32 sum = 0;
for (j=0;j<len;j++)
sum = MAC16_16(sum, x[j],y[i+j]);
sum = MAC16_16(sum, _x[j], _y[i+j]);
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@ -241,30 +251,25 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m
#ifdef FIXED_POINT
return maxcorr;
#endif
}
#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
{
int i,j;
int i;
/*The EDSP version requires that max_pitch is at least 1, and that _x is
32-bit aligned.
Since it's hard to put asserts in assembly, put them here.*/
celt_assert(max_pitch>0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
#ifdef FIXED_POINT
opus_val32 maxcorr=1;
#endif
celt_assert(max_pitch>0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
for (i=0;i<max_pitch-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
xcorr_kernel(_x, _y+i, sum, len);
#if defined(OVERRIDE_PITCH_XCORR)
xcorr_kernel_c(_x, _y+i, sum, len);
#else
xcorr_kernel(_x, _y+i, sum, len, arch);
#endif
xcorr[i]=sum[0];
xcorr[i+1]=sum[1];
xcorr[i+2]=sum[2];
@ -279,9 +284,12 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
for (;i<max_pitch;i++)
{
opus_val32 sum = 0;
for (j=0;j<len;j++)
sum = MAC16_16(sum, _x[j],_y[i+j]);
opus_val32 sum;
#if defined(OVERRIDE_PITCH_XCORR)
sum = celt_inner_prod_c(_x, _y+i, len);
#else
sum = celt_inner_prod(_x, _y+i, len, arch);
#endif
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@ -290,9 +298,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
#ifdef FIXED_POINT
return maxcorr;
#endif
#endif
}
#endif
void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
int len, int max_pitch, int *pitch, int arch)
{
@ -361,12 +369,17 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
#endif
for (i=0;i<max_pitch>>1;i++)
{
opus_val32 sum=0;
opus_val32 sum;
xcorr[i] = 0;
if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
continue;
#ifdef FIXED_POINT
sum = 0;
for (j=0;j<len>>1;j++)
sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
#else
sum = celt_inner_prod_c(x_lp, y+i, len>>1);
#endif
xcorr[i] = MAX32(-1, sum);
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@ -399,9 +412,44 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
RESTORE_STACK;
}
#ifdef FIXED_POINT
static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
{
opus_val32 x2y2;
int sx, sy, shift;
opus_val32 g;
opus_val16 den;
if (xy == 0 || xx == 0 || yy == 0)
return 0;
sx = celt_ilog2(xx)-14;
sy = celt_ilog2(yy)-14;
shift = sx + sy;
x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy));
if (shift & 1) {
if (x2y2 < 32768)
{
x2y2 <<= 1;
shift--;
} else {
x2y2 >>= 1;
shift++;
}
}
den = celt_rsqrt_norm(x2y2);
g = MULT16_32_Q15(den, xy);
g = VSHR32(g, (shift>>1)-1);
return EXTRACT16(MIN32(g, Q15ONE));
}
#else
static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
{
return xy/celt_sqrt(1+xx*yy);
}
#endif
static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int N, int *T0_, int prev_period, opus_val16 prev_gain)
int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
{
int k, i, T, T0;
opus_val16 g, g0;
@ -426,7 +474,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
T = T0 = *T0_;
ALLOC(yy_lookup, maxperiod+1, opus_val32);
dual_inner_prod(x, x, x-T0, N, &xx, &xy);
dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch);
yy_lookup[0] = xx;
yy=xx;
for (i=1;i<=maxperiod;i++)
@ -437,18 +485,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
yy = yy_lookup[T0];
best_xy = xy;
best_yy = yy;
#ifdef FIXED_POINT
{
opus_val32 x2y2;
int sh, t;
x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
sh = celt_ilog2(x2y2)>>1;
t = VSHR32(x2y2, 2*(sh-7));
g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
}
#else
g = g0 = xy/celt_sqrt(1+xx*yy);
#endif
g = g0 = compute_pitch_gain(xy, xx, yy);
/* Look for any pitch at T/k */
for (k=2;k<=15;k++)
{
@ -456,7 +493,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
opus_val16 g1;
opus_val16 cont=0;
opus_val16 thresh;
T1 = (2*T0+k)/(2*k);
T1 = celt_udiv(2*T0+k, 2*k);
if (T1 < minperiod)
break;
/* Look for another strong correlation at T1b */
@ -468,27 +505,16 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
T1b = T0+T1;
} else
{
T1b = (2*second_check[k]*T0+k)/(2*k);
T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
}
dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
xy += xy2;
yy = yy_lookup[T1] + yy_lookup[T1b];
#ifdef FIXED_POINT
{
opus_val32 x2y2;
int sh, t;
x2y2 = 1+MULT32_32_Q31(xx,yy);
sh = celt_ilog2(x2y2)>>1;
t = VSHR32(x2y2, 2*(sh-7));
g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
}
#else
g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
#endif
dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
xy = HALF32(xy + xy2);
yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
g1 = compute_pitch_gain(xy, xx, yy);
if (abs(T1-prev_period)<=1)
cont = prev_gain;
else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
cont = HALF32(prev_gain);
cont = HALF16(prev_gain);
else
cont = 0;
thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
@ -513,13 +539,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
pg = SHR32(frac_div32(best_xy,best_yy+1),16);
for (k=0;k<3;k++)
{
int T1 = T+k-1;
xy = 0;
for (i=0;i<N;i++)
xy = MAC16_16(xy, x[i], x[i-T1]);
xcorr[k] = xy;
}
xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
offset = 1;
else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))

View File

@ -37,11 +37,17 @@
#include "modes.h"
#include "cpu_support.h"
#if defined(__SSE__) && !defined(FIXED_POINT)
#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) \
|| ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT))
#include "x86/pitch_sse.h"
#endif
#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT)
#if defined(MIPSr1_ASM)
#include "mips/pitch_mipsr1.h"
#endif
#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
|| defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
# include "arm/pitch_arm.h"
#endif
@ -52,12 +58,12 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
int len, int max_pitch, int *pitch, int arch);
opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int N, int *T0, int prev_period, opus_val16 prev_gain);
int N, int *T0, int prev_period, opus_val16 prev_gain, int arch);
/* OPT: This is the kernel you really want to optimize. It gets used a lot
by the prefilter and by the PLC. */
#ifndef OVERRIDE_XCORR_KERNEL
static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
opus_val16 y_0, y_1, y_2, y_3;
@ -122,10 +128,14 @@ static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y,
sum[3] = MAC16_16(sum[3],tmp,y_1);
}
}
#ifndef OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)(arch),xcorr_kernel_c(x, y, sum, len))
#endif /* OVERRIDE_XCORR_KERNEL */
#ifndef OVERRIDE_DUAL_INNER_PROD
static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
static OPUS_INLINE void dual_inner_prod_c(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
@ -139,8 +149,35 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y
*xy1 = xy01;
*xy2 = xy02;
}
#ifndef OVERRIDE_DUAL_INNER_PROD
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
((void)(arch),dual_inner_prod_c(x, y01, y02, N, xy1, xy2))
#endif
/*We make sure a C version is always available for cases where the overhead of
vectorization and passing around an arch flag aren't worth it.*/
static OPUS_INLINE opus_val32 celt_inner_prod_c(const opus_val16 *x,
const opus_val16 *y, int N)
{
int i;
opus_val32 xy=0;
for (i=0;i<N;i++)
xy = MAC16_16(xy, x[i], y[i]);
return xy;
}
#if !defined(OVERRIDE_CELT_INNER_PROD)
# define celt_inner_prod(x, y, N, arch) \
((void)(arch),celt_inner_prod_c(x, y, N))
#endif
#ifdef NON_STATIC_COMB_FILTER_CONST_C
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12);
#endif
#ifdef FIXED_POINT
opus_val32
#else
@ -150,24 +187,14 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
#if !defined(OVERRIDE_PITCH_XCORR)
/*Is run-time CPU detection enabled on this platform?*/
# if defined(OPUS_HAVE_RTCD)
extern
# if defined(FIXED_POINT)
#ifdef FIXED_POINT
opus_val32
# else
#else
void
# endif
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
#endif
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch);
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# else
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch))
# endif
#endif
#endif

View File

@ -292,7 +292,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
#endif
}
if (lfe)
max_decay=3;
max_decay = QCONST16(3.f,DB_SHIFT);
enc_start_state = *enc;
ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
@ -418,6 +418,7 @@ void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *ol
offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
#endif
oldEBands[i+c*m->nbEBands] += offset;
error[i+c*m->nbEBands] -= offset;
bits_left--;
} while (++c < C);
}
@ -547,9 +548,15 @@ void amp2Log2(const CELTMode *m, int effEnd, int end,
c=0;
do {
for (i=0;i<effEnd;i++)
{
bandLogE[i+c*m->nbEBands] =
celt_log2(SHL32(bandE[i+c*m->nbEBands],2))
celt_log2(bandE[i+c*m->nbEBands])
- SHL16((opus_val16)eMeans[i],6);
#ifdef FIXED_POINT
/* Compensate for bandE[] being Q12 but celt_log2() taking a Q14 input. */
bandLogE[i+c*m->nbEBands] += QCONST16(2.f, DB_SHIFT);
#endif
}
for (i=effEnd;i<end;i++)
bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT);
} while (++c < C);

View File

@ -131,7 +131,7 @@ void compute_pulse_cache(CELTMode *m, int LM)
for (i=0;i<nbEntries;i++)
{
unsigned char *ptr = bits+entryI[i];
opus_int16 tmp[MAX_PULSES+1];
opus_int16 tmp[CELT_MAX_PULSES+1];
get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES);
for (j=1;j<=entryK[i];j++)
ptr[j] = tmp[get_pulses(j)]-1;
@ -296,7 +296,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
done = 0;
for (j=end;j-->start;)
{
int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS);
int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS);
if (tmp < thresh[j] && !done)
{
if (tmp >= alloc_floor)
@ -333,7 +333,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
/*Figure out how many left-over bits we would be adding to this band.
This can include bits we've stolen back from higher, skipped bands.*/
left = total-psum;
percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0);
band_width = m->eBands[codedBands]-m->eBands[j];
@ -348,12 +348,17 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
/*This if() block is the only part of the allocation function that
is not a mandatory part of the bitstream: any bands we choose to
skip here must be explicitly signaled.*/
/*Choose a threshold with some hysteresis to keep bands from
fluctuating in and out.*/
int depth_threshold;
/*We choose a threshold with some hysteresis to keep bands from
fluctuating in and out, but we try not to fold below a certain point. */
if (codedBands > 17)
depth_threshold = j<prev ? 7 : 9;
else
depth_threshold = 0;
#ifdef FUZZING
if ((rand()&0x1) == 0)
#else
if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
if (codedBands<=start+2 || (band_bits > (depth_threshold*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
#endif
{
ec_enc_bit_logp(ec, 1, 1);
@ -414,7 +419,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
/* Allocate the remaining bits */
left = total-psum;
percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
for (j=start;j<codedBands;j++)
bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j]));
@ -465,7 +470,8 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
offset += NClogN>>3;
/* Divide with rounding */
ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES));
ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))));
ebits[j] = celt_udiv(ebits[j], den)>>BITRES;
/* Make sure not to bust */
if (C*ebits[j] > (bits[j]>>BITRES))

View File

@ -32,7 +32,7 @@
#define MAX_PSEUDO 40
#define LOG_MAX_PSEUDO 6
#define MAX_PULSES 128
#define CELT_MAX_PULSES 128
#define MAX_FINE_BITS 8

View File

@ -116,9 +116,11 @@
#else
#ifdef CELT_C
char *scratch_ptr=0;
char *global_stack=0;
#else
extern char *global_stack;
extern char *scratch_ptr;
#endif /* CELT_C */
#ifdef ENABLE_VALGRIND
@ -140,8 +142,12 @@ extern char *global_stack_top;
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
#if 0 /* Set this to 1 to instrument pseudostack usage */
#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack)
#else
#define RESTORE_STACK (global_stack = _saved_stack)
#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack;
#endif
#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack;
#endif /* ENABLE_VALGRIND */

View File

@ -4,6 +4,11 @@
#include "modes.h"
#include "rate.h"
#ifdef HAVE_ARM_NE10
#define OVERRIDE_FFT 1
#include "static_modes_fixed_arm_ne10.h"
#endif
#ifndef DEF_WINDOW120
#define DEF_WINDOW120
static const opus_val16 window120[120] = {
@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
#ifndef FFT_BITREV480
#define FFT_BITREV480
static const opus_int16 fft_bitrev480[480] = {
0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
};
#endif
#ifndef FFT_BITREV240
#define FFT_BITREV240
static const opus_int16 fft_bitrev240[240] = {
0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
};
#endif
#ifndef FFT_BITREV120
#define FFT_BITREV120
static const opus_int16 fft_bitrev120[120] = {
0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
};
#endif
#ifndef FFT_BITREV60
#define FFT_BITREV60
static const opus_int16 fft_bitrev60[60] = {
0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
};
#endif
@ -426,10 +431,17 @@ static const opus_int16 fft_bitrev60[60] = {
#define FFT_STATE48000_960_0
static const kiss_fft_state fft_state48000_960_0 = {
480, /* nfft */
17476, /* scale */
8, /* scale_shift */
-1, /* shift */
{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev480, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_480,
#else
NULL,
#endif
};
#endif
@ -437,10 +449,17 @@ fft_twiddles48000_960, /* bitrev */
#define FFT_STATE48000_960_1
static const kiss_fft_state fft_state48000_960_1 = {
240, /* nfft */
17476, /* scale */
7, /* scale_shift */
1, /* shift */
{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev240, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_240,
#else
NULL,
#endif
};
#endif
@ -448,10 +467,17 @@ fft_twiddles48000_960, /* bitrev */
#define FFT_STATE48000_960_2
static const kiss_fft_state fft_state48000_960_2 = {
120, /* nfft */
17476, /* scale */
6, /* scale_shift */
2, /* shift */
{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev120, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_120,
#else
NULL,
#endif
};
#endif
@ -459,10 +485,17 @@ fft_twiddles48000_960, /* bitrev */
#define FFT_STATE48000_960_3
static const kiss_fft_state fft_state48000_960_3 = {
60, /* nfft */
17476, /* scale */
5, /* scale_shift */
3, /* shift */
{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev60, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_60,
#else
NULL,
#endif
};
#endif
@ -470,104 +503,368 @@ fft_twiddles48000_960, /* bitrev */
#ifndef MDCT_TWIDDLES960
#define MDCT_TWIDDLES960
static const opus_val16 mdct_twiddles960[481] = {
32767, 32767, 32767, 32767, 32766,
32763, 32762, 32759, 32757, 32753,
32751, 32747, 32743, 32738, 32733,
32729, 32724, 32717, 32711, 32705,
32698, 32690, 32683, 32676, 32667,
32658, 32650, 32640, 32631, 32620,
32610, 32599, 32588, 32577, 32566,
32554, 32541, 32528, 32515, 32502,
32487, 32474, 32459, 32444, 32429,
32413, 32397, 32381, 32364, 32348,
32331, 32313, 32294, 32277, 32257,
32239, 32219, 32200, 32180, 32159,
32138, 32118, 32096, 32074, 32051,
32029, 32006, 31984, 31960, 31936,
31912, 31888, 31863, 31837, 31812,
31786, 31760, 31734, 31707, 31679,
31652, 31624, 31596, 31567, 31539,
31508, 31479, 31450, 31419, 31388,
31357, 31326, 31294, 31262, 31230,
31198, 31164, 31131, 31097, 31063,
31030, 30994, 30959, 30924, 30889,
30853, 30816, 30779, 30743, 30705,
30668, 30629, 30592, 30553, 30515,
30475, 30435, 30396, 30356, 30315,
30274, 30233, 30191, 30149, 30107,
30065, 30022, 29979, 29936, 29891,
29847, 29803, 29758, 29713, 29668,
29622, 29577, 29529, 29483, 29436,
29390, 29341, 29293, 29246, 29197,
29148, 29098, 29050, 29000, 28949,
28899, 28848, 28797, 28746, 28694,
28642, 28590, 28537, 28485, 28432,
28378, 28324, 28271, 28217, 28162,
28106, 28051, 27995, 27940, 27884,
27827, 27770, 27713, 27657, 27598,
27540, 27481, 27423, 27365, 27305,
27246, 27187, 27126, 27066, 27006,
26945, 26883, 26822, 26760, 26698,
26636, 26574, 26510, 26448, 26383,
26320, 26257, 26191, 26127, 26062,
25997, 25931, 25866, 25800, 25734,
25667, 25601, 25533, 25466, 25398,
25330, 25262, 25194, 25125, 25056,
24987, 24917, 24848, 24778, 24707,
24636, 24566, 24495, 24424, 24352,
24280, 24208, 24135, 24063, 23990,
23917, 23842, 23769, 23695, 23622,
23546, 23472, 23398, 23322, 23246,
23171, 23095, 23018, 22942, 22866,
22788, 22711, 22634, 22557, 22478,
22400, 22322, 22244, 22165, 22085,
22006, 21927, 21846, 21766, 21687,
21606, 21524, 21443, 21363, 21282,
21199, 21118, 21035, 20954, 20870,
20788, 20705, 20621, 20538, 20455,
20371, 20286, 20202, 20118, 20034,
19947, 19863, 19777, 19692, 19606,
19520, 19434, 19347, 19260, 19174,
19088, 18999, 18911, 18825, 18737,
18648, 18560, 18472, 18384, 18294,
18205, 18116, 18025, 17936, 17846,
17757, 17666, 17576, 17485, 17395,
17303, 17212, 17122, 17030, 16937,
16846, 16755, 16662, 16569, 16477,
16385, 16291, 16198, 16105, 16012,
15917, 15824, 15730, 15636, 15541,
15447, 15352, 15257, 15162, 15067,
14973, 14875, 14781, 14685, 14589,
14493, 14396, 14300, 14204, 14107,
14010, 13914, 13815, 13718, 13621,
13524, 13425, 13328, 13230, 13133,
13033, 12935, 12836, 12738, 12638,
12540, 12441, 12341, 12241, 12142,
12044, 11943, 11843, 11744, 11643,
11542, 11442, 11342, 11241, 11139,
11039, 10939, 10836, 10736, 10635,
10534, 10431, 10330, 10228, 10127,
10024, 9921, 9820, 9718, 9614,
9512, 9410, 9306, 9204, 9101,
8998, 8895, 8791, 8689, 8585,
8481, 8377, 8274, 8171, 8067,
7962, 7858, 7753, 7650, 7545,
7441, 7336, 7231, 7129, 7023,
6917, 6813, 6709, 6604, 6498,
6393, 6288, 6182, 6077, 5973,
5867, 5760, 5656, 5549, 5445,
5339, 5232, 5127, 5022, 4914,
4809, 4703, 4596, 4490, 4384,
4278, 4171, 4065, 3958, 3852,
3745, 3640, 3532, 3426, 3318,
3212, 3106, 2998, 2891, 2786,
2679, 2570, 2465, 2358, 2251,
2143, 2037, 1929, 1823, 1715,
1609, 1501, 1393, 1287, 1180,
1073, 964, 858, 751, 644,
535, 429, 322, 214, 107,
0, };
static const opus_val16 mdct_twiddles960[1800] = {
32767, 32767, 32767, 32766, 32765,
32763, 32761, 32759, 32756, 32753,
32750, 32746, 32742, 32738, 32733,
32728, 32722, 32717, 32710, 32704,
32697, 32690, 32682, 32674, 32666,
32657, 32648, 32639, 32629, 32619,
32609, 32598, 32587, 32576, 32564,
32552, 32539, 32526, 32513, 32500,
32486, 32472, 32457, 32442, 32427,
32411, 32395, 32379, 32362, 32345,
32328, 32310, 32292, 32274, 32255,
32236, 32217, 32197, 32177, 32157,
32136, 32115, 32093, 32071, 32049,
32027, 32004, 31981, 31957, 31933,
31909, 31884, 31859, 31834, 31809,
31783, 31756, 31730, 31703, 31676,
31648, 31620, 31592, 31563, 31534,
31505, 31475, 31445, 31415, 31384,
31353, 31322, 31290, 31258, 31226,
31193, 31160, 31127, 31093, 31059,
31025, 30990, 30955, 30920, 30884,
30848, 30812, 30775, 30738, 30701,
30663, 30625, 30587, 30548, 30509,
30470, 30430, 30390, 30350, 30309,
30269, 30227, 30186, 30144, 30102,
30059, 30016, 29973, 29930, 29886,
29842, 29797, 29752, 29707, 29662,
29616, 29570, 29524, 29477, 29430,
29383, 29335, 29287, 29239, 29190,
29142, 29092, 29043, 28993, 28943,
28892, 28842, 28791, 28739, 28688,
28636, 28583, 28531, 28478, 28425,
28371, 28317, 28263, 28209, 28154,
28099, 28044, 27988, 27932, 27876,
27820, 27763, 27706, 27648, 27591,
27533, 27474, 27416, 27357, 27298,
27238, 27178, 27118, 27058, 26997,
26936, 26875, 26814, 26752, 26690,
26628, 26565, 26502, 26439, 26375,
26312, 26247, 26183, 26119, 26054,
25988, 25923, 25857, 25791, 25725,
25658, 25592, 25524, 25457, 25389,
25322, 25253, 25185, 25116, 25047,
24978, 24908, 24838, 24768, 24698,
24627, 24557, 24485, 24414, 24342,
24270, 24198, 24126, 24053, 23980,
23907, 23834, 23760, 23686, 23612,
23537, 23462, 23387, 23312, 23237,
23161, 23085, 23009, 22932, 22856,
22779, 22701, 22624, 22546, 22468,
22390, 22312, 22233, 22154, 22075,
21996, 21916, 21836, 21756, 21676,
21595, 21515, 21434, 21352, 21271,
21189, 21107, 21025, 20943, 20860,
20777, 20694, 20611, 20528, 20444,
20360, 20276, 20192, 20107, 20022,
19937, 19852, 19767, 19681, 19595,
19509, 19423, 19336, 19250, 19163,
19076, 18988, 18901, 18813, 18725,
18637, 18549, 18460, 18372, 18283,
18194, 18104, 18015, 17925, 17835,
17745, 17655, 17565, 17474, 17383,
17292, 17201, 17110, 17018, 16927,
16835, 16743, 16650, 16558, 16465,
16372, 16279, 16186, 16093, 15999,
15906, 15812, 15718, 15624, 15529,
15435, 15340, 15245, 15150, 15055,
14960, 14864, 14769, 14673, 14577,
14481, 14385, 14288, 14192, 14095,
13998, 13901, 13804, 13706, 13609,
13511, 13414, 13316, 13218, 13119,
13021, 12923, 12824, 12725, 12626,
12527, 12428, 12329, 12230, 12130,
12030, 11930, 11831, 11730, 11630,
11530, 11430, 11329, 11228, 11128,
11027, 10926, 10824, 10723, 10622,
10520, 10419, 10317, 10215, 10113,
10011, 9909, 9807, 9704, 9602,
9499, 9397, 9294, 9191, 9088,
8985, 8882, 8778, 8675, 8572,
8468, 8364, 8261, 8157, 8053,
7949, 7845, 7741, 7637, 7532,
7428, 7323, 7219, 7114, 7009,
6905, 6800, 6695, 6590, 6485,
6380, 6274, 6169, 6064, 5958,
5853, 5747, 5642, 5536, 5430,
5325, 5219, 5113, 5007, 4901,
4795, 4689, 4583, 4476, 4370,
4264, 4157, 4051, 3945, 3838,
3732, 3625, 3518, 3412, 3305,
3198, 3092, 2985, 2878, 2771,
2664, 2558, 2451, 2344, 2237,
2130, 2023, 1916, 1809, 1702,
1594, 1487, 1380, 1273, 1166,
1059, 952, 844, 737, 630,
523, 416, 308, 201, 94,
-13, -121, -228, -335, -442,
-550, -657, -764, -871, -978,
-1086, -1193, -1300, -1407, -1514,
-1621, -1728, -1835, -1942, -2049,
-2157, -2263, -2370, -2477, -2584,
-2691, -2798, -2905, -3012, -3118,
-3225, -3332, -3439, -3545, -3652,
-3758, -3865, -3971, -4078, -4184,
-4290, -4397, -4503, -4609, -4715,
-4821, -4927, -5033, -5139, -5245,
-5351, -5457, -5562, -5668, -5774,
-5879, -5985, -6090, -6195, -6301,
-6406, -6511, -6616, -6721, -6826,
-6931, -7036, -7140, -7245, -7349,
-7454, -7558, -7663, -7767, -7871,
-7975, -8079, -8183, -8287, -8390,
-8494, -8597, -8701, -8804, -8907,
-9011, -9114, -9217, -9319, -9422,
-9525, -9627, -9730, -9832, -9934,
-10037, -10139, -10241, -10342, -10444,
-10546, -10647, -10748, -10850, -10951,
-11052, -11153, -11253, -11354, -11455,
-11555, -11655, -11756, -11856, -11955,
-12055, -12155, -12254, -12354, -12453,
-12552, -12651, -12750, -12849, -12947,
-13046, -13144, -13242, -13340, -13438,
-13536, -13633, -13731, -13828, -13925,
-14022, -14119, -14216, -14312, -14409,
-14505, -14601, -14697, -14793, -14888,
-14984, -15079, -15174, -15269, -15364,
-15459, -15553, -15647, -15741, -15835,
-15929, -16023, -16116, -16210, -16303,
-16396, -16488, -16581, -16673, -16766,
-16858, -16949, -17041, -17133, -17224,
-17315, -17406, -17497, -17587, -17678,
-17768, -17858, -17948, -18037, -18127,
-18216, -18305, -18394, -18483, -18571,
-18659, -18747, -18835, -18923, -19010,
-19098, -19185, -19271, -19358, -19444,
-19531, -19617, -19702, -19788, -19873,
-19959, -20043, -20128, -20213, -20297,
-20381, -20465, -20549, -20632, -20715,
-20798, -20881, -20963, -21046, -21128,
-21210, -21291, -21373, -21454, -21535,
-21616, -21696, -21776, -21856, -21936,
-22016, -22095, -22174, -22253, -22331,
-22410, -22488, -22566, -22643, -22721,
-22798, -22875, -22951, -23028, -23104,
-23180, -23256, -23331, -23406, -23481,
-23556, -23630, -23704, -23778, -23852,
-23925, -23998, -24071, -24144, -24216,
-24288, -24360, -24432, -24503, -24574,
-24645, -24716, -24786, -24856, -24926,
-24995, -25064, -25133, -25202, -25270,
-25339, -25406, -25474, -25541, -25608,
-25675, -25742, -25808, -25874, -25939,
-26005, -26070, -26135, -26199, -26264,
-26327, -26391, -26455, -26518, -26581,
-26643, -26705, -26767, -26829, -26891,
-26952, -27013, -27073, -27133, -27193,
-27253, -27312, -27372, -27430, -27489,
-27547, -27605, -27663, -27720, -27777,
-27834, -27890, -27946, -28002, -28058,
-28113, -28168, -28223, -28277, -28331,
-28385, -28438, -28491, -28544, -28596,
-28649, -28701, -28752, -28803, -28854,
-28905, -28955, -29006, -29055, -29105,
-29154, -29203, -29251, -29299, -29347,
-29395, -29442, -29489, -29535, -29582,
-29628, -29673, -29719, -29764, -29808,
-29853, -29897, -29941, -29984, -30027,
-30070, -30112, -30154, -30196, -30238,
-30279, -30320, -30360, -30400, -30440,
-30480, -30519, -30558, -30596, -30635,
-30672, -30710, -30747, -30784, -30821,
-30857, -30893, -30929, -30964, -30999,
-31033, -31068, -31102, -31135, -31168,
-31201, -31234, -31266, -31298, -31330,
-31361, -31392, -31422, -31453, -31483,
-31512, -31541, -31570, -31599, -31627,
-31655, -31682, -31710, -31737, -31763,
-31789, -31815, -31841, -31866, -31891,
-31915, -31939, -31963, -31986, -32010,
-32032, -32055, -32077, -32099, -32120,
-32141, -32162, -32182, -32202, -32222,
-32241, -32260, -32279, -32297, -32315,
-32333, -32350, -32367, -32383, -32399,
-32415, -32431, -32446, -32461, -32475,
-32489, -32503, -32517, -32530, -32542,
-32555, -32567, -32579, -32590, -32601,
-32612, -32622, -32632, -32641, -32651,
-32659, -32668, -32676, -32684, -32692,
-32699, -32706, -32712, -32718, -32724,
-32729, -32734, -32739, -32743, -32747,
-32751, -32754, -32757, -32760, -32762,
-32764, -32765, -32767, -32767, -32767,
32767, 32767, 32765, 32761, 32756,
32750, 32742, 32732, 32722, 32710,
32696, 32681, 32665, 32647, 32628,
32608, 32586, 32562, 32538, 32512,
32484, 32455, 32425, 32393, 32360,
32326, 32290, 32253, 32214, 32174,
32133, 32090, 32046, 32001, 31954,
31906, 31856, 31805, 31753, 31700,
31645, 31588, 31530, 31471, 31411,
31349, 31286, 31222, 31156, 31089,
31020, 30951, 30880, 30807, 30733,
30658, 30582, 30504, 30425, 30345,
30263, 30181, 30096, 30011, 29924,
29836, 29747, 29656, 29564, 29471,
29377, 29281, 29184, 29086, 28987,
28886, 28784, 28681, 28577, 28471,
28365, 28257, 28147, 28037, 27925,
27812, 27698, 27583, 27467, 27349,
27231, 27111, 26990, 26868, 26744,
26620, 26494, 26367, 26239, 26110,
25980, 25849, 25717, 25583, 25449,
25313, 25176, 25038, 24900, 24760,
24619, 24477, 24333, 24189, 24044,
23898, 23751, 23602, 23453, 23303,
23152, 22999, 22846, 22692, 22537,
22380, 22223, 22065, 21906, 21746,
21585, 21423, 21261, 21097, 20933,
20767, 20601, 20434, 20265, 20096,
19927, 19756, 19584, 19412, 19239,
19065, 18890, 18714, 18538, 18361,
18183, 18004, 17824, 17644, 17463,
17281, 17098, 16915, 16731, 16546,
16361, 16175, 15988, 15800, 15612,
15423, 15234, 15043, 14852, 14661,
14469, 14276, 14083, 13889, 13694,
13499, 13303, 13107, 12910, 12713,
12515, 12317, 12118, 11918, 11718,
11517, 11316, 11115, 10913, 10710,
10508, 10304, 10100, 9896, 9691,
9486, 9281, 9075, 8869, 8662,
8455, 8248, 8040, 7832, 7623,
7415, 7206, 6996, 6787, 6577,
6366, 6156, 5945, 5734, 5523,
5311, 5100, 4888, 4675, 4463,
4251, 4038, 3825, 3612, 3399,
3185, 2972, 2758, 2544, 2330,
2116, 1902, 1688, 1474, 1260,
1045, 831, 617, 402, 188,
-27, -241, -456, -670, -885,
-1099, -1313, -1528, -1742, -1956,
-2170, -2384, -2598, -2811, -3025,
-3239, -3452, -3665, -3878, -4091,
-4304, -4516, -4728, -4941, -5153,
-5364, -5576, -5787, -5998, -6209,
-6419, -6629, -6839, -7049, -7258,
-7467, -7676, -7884, -8092, -8300,
-8507, -8714, -8920, -9127, -9332,
-9538, -9743, -9947, -10151, -10355,
-10558, -10761, -10963, -11165, -11367,
-11568, -11768, -11968, -12167, -12366,
-12565, -12762, -12960, -13156, -13352,
-13548, -13743, -13937, -14131, -14324,
-14517, -14709, -14900, -15091, -15281,
-15470, -15659, -15847, -16035, -16221,
-16407, -16593, -16777, -16961, -17144,
-17326, -17508, -17689, -17869, -18049,
-18227, -18405, -18582, -18758, -18934,
-19108, -19282, -19455, -19627, -19799,
-19969, -20139, -20308, -20475, -20642,
-20809, -20974, -21138, -21301, -21464,
-21626, -21786, -21946, -22105, -22263,
-22420, -22575, -22730, -22884, -23037,
-23189, -23340, -23490, -23640, -23788,
-23935, -24080, -24225, -24369, -24512,
-24654, -24795, -24934, -25073, -25211,
-25347, -25482, -25617, -25750, -25882,
-26013, -26143, -26272, -26399, -26526,
-26651, -26775, -26898, -27020, -27141,
-27260, -27379, -27496, -27612, -27727,
-27841, -27953, -28065, -28175, -28284,
-28391, -28498, -28603, -28707, -28810,
-28911, -29012, -29111, -29209, -29305,
-29401, -29495, -29587, -29679, -29769,
-29858, -29946, -30032, -30118, -30201,
-30284, -30365, -30445, -30524, -30601,
-30677, -30752, -30825, -30897, -30968,
-31038, -31106, -31172, -31238, -31302,
-31365, -31426, -31486, -31545, -31602,
-31658, -31713, -31766, -31818, -31869,
-31918, -31966, -32012, -32058, -32101,
-32144, -32185, -32224, -32262, -32299,
-32335, -32369, -32401, -32433, -32463,
-32491, -32518, -32544, -32568, -32591,
-32613, -32633, -32652, -32669, -32685,
-32700, -32713, -32724, -32735, -32744,
-32751, -32757, -32762, -32766, -32767,
32767, 32764, 32755, 32741, 32720,
32694, 32663, 32626, 32583, 32535,
32481, 32421, 32356, 32286, 32209,
32128, 32041, 31948, 31850, 31747,
31638, 31523, 31403, 31278, 31148,
31012, 30871, 30724, 30572, 30415,
30253, 30086, 29913, 29736, 29553,
29365, 29172, 28974, 28771, 28564,
28351, 28134, 27911, 27684, 27452,
27216, 26975, 26729, 26478, 26223,
25964, 25700, 25432, 25159, 24882,
24601, 24315, 24026, 23732, 23434,
23133, 22827, 22517, 22204, 21886,
21565, 21240, 20912, 20580, 20244,
19905, 19563, 19217, 18868, 18516,
18160, 17802, 17440, 17075, 16708,
16338, 15964, 15588, 15210, 14829,
14445, 14059, 13670, 13279, 12886,
12490, 12093, 11693, 11291, 10888,
10482, 10075, 9666, 9255, 8843,
8429, 8014, 7597, 7180, 6760,
6340, 5919, 5496, 5073, 4649,
4224, 3798, 3372, 2945, 2517,
2090, 1661, 1233, 804, 375,
-54, -483, -911, -1340, -1768,
-2197, -2624, -3052, -3479, -3905,
-4330, -4755, -5179, -5602, -6024,
-6445, -6865, -7284, -7702, -8118,
-8533, -8946, -9358, -9768, -10177,
-10584, -10989, -11392, -11793, -12192,
-12589, -12984, -13377, -13767, -14155,
-14541, -14924, -15305, -15683, -16058,
-16430, -16800, -17167, -17531, -17892,
-18249, -18604, -18956, -19304, -19649,
-19990, -20329, -20663, -20994, -21322,
-21646, -21966, -22282, -22595, -22904,
-23208, -23509, -23806, -24099, -24387,
-24672, -24952, -25228, -25499, -25766,
-26029, -26288, -26541, -26791, -27035,
-27275, -27511, -27741, -27967, -28188,
-28405, -28616, -28823, -29024, -29221,
-29412, -29599, -29780, -29957, -30128,
-30294, -30455, -30611, -30761, -30906,
-31046, -31181, -31310, -31434, -31552,
-31665, -31773, -31875, -31972, -32063,
-32149, -32229, -32304, -32373, -32437,
-32495, -32547, -32594, -32635, -32671,
-32701, -32726, -32745, -32758, -32766,
32767, 32754, 32717, 32658, 32577,
32473, 32348, 32200, 32029, 31837,
31624, 31388, 31131, 30853, 30553,
30232, 29891, 29530, 29148, 28746,
28324, 27883, 27423, 26944, 26447,
25931, 25398, 24847, 24279, 23695,
23095, 22478, 21846, 21199, 20538,
19863, 19174, 18472, 17757, 17030,
16291, 15541, 14781, 14010, 13230,
12441, 11643, 10837, 10024, 9204,
8377, 7545, 6708, 5866, 5020,
4171, 3319, 2464, 1608, 751,
-107, -965, -1822, -2678, -3532,
-4383, -5232, -6077, -6918, -7754,
-8585, -9409, -10228, -11039, -11843,
-12639, -13426, -14204, -14972, -15730,
-16477, -17213, -17937, -18648, -19347,
-20033, -20705, -21363, -22006, -22634,
-23246, -23843, -24423, -24986, -25533,
-26062, -26573, -27066, -27540, -27995,
-28431, -28848, -29245, -29622, -29979,
-30315, -30630, -30924, -31197, -31449,
-31679, -31887, -32074, -32239, -32381,
-32501, -32600, -32675, -32729, -32759,
};
#endif
static const CELTMode mode48000_960_120 = {

View File

@ -0,0 +1,388 @@
/* The contents of this file was automatically generated by
* dump_mode_arm_ne10.c with arguments: 48000 960
* It contains static definitions for some pre-defined modes. */
#include <NE10_init.h>
#ifndef NE10_FFT_PARAMS48000_960
#define NE10_FFT_PARAMS48000_960
static const ne10_int32_t ne10_factors_480[64] = {
4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_240[64] = {
3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_120[64] = {
3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_60[64] = {
2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_fft_cpx_int32_t ne10_twiddles_480[480] = {
{0,0}, {2147483647,0}, {2147483647,0},
{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
{2147483647,0}, {2147299667,-28109693}, {2146747758,-56214570},
{2145828015,-84309815}, {2144540595,-112390613}, {2142885719,-140452154},
{2140863671,-168489630}, {2138474797,-196498235}, {2135719506,-224473172},
{2132598271,-252409646}, {2129111626,-280302871}, {2125260168,-308148068},
{2121044558,-335940465}, {2116465518,-363675300}, {2111523833,-391347822},
{2106220349,-418953288}, {2100555974,-446486968}, {2094531681,-473944146},
{2088148500,-501320115}, {2081407525,-528610186}, {2074309912,-555809682},
{2066856885,-582913912}, {2059049696,-609918325}, {2050889698,-636818231},
{2042378310,-663608960}, {2033516972,-690285983}, {2024307180,-716844791},
{2014750533,-743280770}, {2004848691,-769589332}, {1994603329,-795766029},
{1984016179,-821806435}, {1973089077,-847706028}, {1961823921,-873460313},
{1950222618,-899064934}, {1938287127,-924515564}, {1926019520,-949807783},
{1913421927,-974937199}, {1900496481,-999899565}, {1887245364,-1024690661},
{1873670877,-1049306180}, {1859775377,-1073741851}, {1845561215,-1097993541},
{1831030826,-1122057097}, {1816186632,-1145928502}, {1801031311,-1169603450},
{1785567394,-1193077993}, {1769797456,-1216348214}, {1753724345,-1239409914},
{1737350743,-1262259248}, {1720679456,-1284892300}, {1703713340,-1307305194},
{1686455222,-1329494189}, {1668908218,-1351455280}, {1651075255,-1373184807},
{1632959307,-1394679144}, {1614563642,-1415934412}, {1595891331,-1436947067},
{1576945572,-1457713510}, {1557729613,-1478230181}, {1538246655,-1498493658},
{1518500216,-1518500282}, {1498493590,-1538246721}, {1478230113,-1557729677},
{1457713441,-1576945636}, {1436946998,-1595891394}, {1415934341,-1614563704},
{1394679073,-1632959368}, {1373184735,-1651075315}, {1351455207,-1668908277},
{1329494115,-1686455280}, {1307305120,-1703713397}, {1284892225,-1720679512},
{1262259172,-1737350799}, {1239409837,-1753724400}, {1216348136,-1769797510},
{1193077915,-1785567446}, {1169603371,-1801031362}, {1145928423,-1816186682},
{1122057017,-1831030875}, {1097993571,-1845561197}, {1073741769,-1859775424},
{1049305987,-1873670985}, {1024690635,-1887245378}, {999899482,-1900496524},
{974937230,-1913421912}, {949807699,-1926019561}, {924515422,-1938287195},
{899064965,-1950222603}, {873460227,-1961823959}, {847705824,-1973089164},
{821806407,-1984016190}, {795765941,-1994603364}, {769589125,-2004848771},
{743280682,-2014750566}, {716844642,-2024307233}, {690286016,-2033516961},
{663608871,-2042378339}, {636818019,-2050889764}, {609918296,-2059049705},
{582913822,-2066856911}, {555809715,-2074309903}, {528610126,-2081407540},
{501319962,-2088148536}, {473944148,-2094531680}, {446486876,-2100555994},
{418953102,-2106220386}, {391347792,-2111523838}, {363675176,-2116465540},
{335940246,-2121044593}, {308148006,-2125260177}, {280302715,-2129111646},
{252409648,-2132598271}, {224473078,-2135719516}, {196498046,-2138474814},
{168489600,-2140863674}, {140452029,-2142885728}, {112390647,-2144540593},
{84309753,-2145828017}, {56214412,-2146747762}, {28109695,-2147299667},
{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
{-94,-2147483647}, {-56214600,-2146747757}, {-112390835,-2144540584},
{-168489787,-2140863659}, {-224473265,-2135719496}, {-280302901,-2129111622},
{-335940431,-2121044564}, {-391347977,-2111523804}, {-446487060,-2100555955},
{-501320144,-2088148493}, {-555809896,-2074309855}, {-609918476,-2059049651},
{-663609049,-2042378281}, {-716844819,-2024307170}, {-769589300,-2004848703},
{-821806581,-1984016118}, {-873460398,-1961823883}, {-924515591,-1938287114},
{-974937397,-1913421827}, {-1024690575,-1887245411}, {-1073741932,-1859775330},
{-1122057395,-1831030643}, {-1169603421,-1801031330}, {-1216348291,-1769797403},
{-1262259116,-1737350839}, {-1307305268,-1703713283}, {-1351455453,-1668908078},
{-1394679021,-1632959413}, {-1436947137,-1595891268}, {-1478230435,-1557729372},
{-1518500258,-1518500240}, {-1557729742,-1478230045}, {-1595891628,-1436946738},
{-1632959429,-1394679001}, {-1668908417,-1351455035}, {-1703713298,-1307305248},
{-1737350854,-1262259096}, {-1769797708,-1216347848}, {-1801031344,-1169603400},
{-1831030924,-1122056937}, {-1859775343,-1073741910}, {-1887245423,-1024690552},
{-1913422071,-974936918}, {-1938287125,-924515568}, {-1961823997,-873460141},
{-1984016324,-821806084}, {-2004848713,-769589276}, {-2024307264,-716844553},
{-2042378447,-663608538}, {-2059049731,-609918206}, {-2074309994,-555809377},
{-2088148499,-501320119}, {-2100556013,-446486785}, {-2111523902,-391347448},
{-2121044568,-335940406}, {-2129111659,-280302621}, {-2135719499,-224473240},
{-2140863681,-168489506}, {-2144540612,-112390298}, {-2146747758,-56214574},
{2147483647,0}, {2145828015,-84309815}, {2140863671,-168489630},
{2132598271,-252409646}, {2121044558,-335940465}, {2106220349,-418953288},
{2088148500,-501320115}, {2066856885,-582913912}, {2042378310,-663608960},
{2014750533,-743280770}, {1984016179,-821806435}, {1950222618,-899064934},
{1913421927,-974937199}, {1873670877,-1049306180}, {1831030826,-1122057097},
{1785567394,-1193077993}, {1737350743,-1262259248}, {1686455222,-1329494189},
{1632959307,-1394679144}, {1576945572,-1457713510}, {1518500216,-1518500282},
{1457713441,-1576945636}, {1394679073,-1632959368}, {1329494115,-1686455280},
{1262259172,-1737350799}, {1193077915,-1785567446}, {1122057017,-1831030875},
{1049305987,-1873670985}, {974937230,-1913421912}, {899064965,-1950222603},
{821806407,-1984016190}, {743280682,-2014750566}, {663608871,-2042378339},
{582913822,-2066856911}, {501319962,-2088148536}, {418953102,-2106220386},
{335940246,-2121044593}, {252409648,-2132598271}, {168489600,-2140863674},
{84309753,-2145828017}, {-94,-2147483647}, {-84309940,-2145828010},
{-168489787,-2140863659}, {-252409834,-2132598249}, {-335940431,-2121044564},
{-418953286,-2106220349}, {-501320144,-2088148493}, {-582914003,-2066856860},
{-663609049,-2042378281}, {-743280858,-2014750501}, {-821806581,-1984016118},
{-899065136,-1950222525}, {-974937397,-1913421827}, {-1049306374,-1873670768},
{-1122057395,-1831030643}, {-1193078284,-1785567199}, {-1262259116,-1737350839},
{-1329494061,-1686455323}, {-1394679021,-1632959413}, {-1457713485,-1576945595},
{-1518500258,-1518500240}, {-1576945613,-1457713466}, {-1632959429,-1394679001},
{-1686455338,-1329494041}, {-1737350854,-1262259096}, {-1785567498,-1193077837},
{-1831030924,-1122056937}, {-1873671031,-1049305905}, {-1913422071,-974936918},
{-1950222750,-899064648}, {-1984016324,-821806084}, {-2014750687,-743280354},
{-2042378447,-663608538}, {-2066856867,-582913978}, {-2088148499,-501320119},
{-2106220354,-418953261}, {-2121044568,-335940406}, {-2132598282,-252409555},
{-2140863681,-168489506}, {-2145828021,-84309659}, {-2147483647,188},
{-2145828006,84310034}, {-2140863651,168489881}, {-2132598237,252409928},
{-2121044509,335940777}, {-2106220281,418953629}, {-2088148411,501320484},
{-2066856765,582914339}, {-2042378331,663608895}, {-2014750557,743280706},
{-1984016181,821806431}, {-1950222593,899064989}, {-1913421900,974937252},
{-1873670848,1049306232}, {-1831030728,1122057257}, {-1785567289,1193078149},
{-1737350633,1262259400}, {-1686455106,1329494336}, {-1632959185,1394679287},
{-1576945358,1457713742}, {-1518499993,1518500506}, {-1457713209,1576945850},
{-1394678735,1632959656}, {-1329493766,1686455555}, {-1262258813,1737351059},
{-1193077546,1785567692}, {-1122056638,1831031107}, {-1049305599,1873671202},
{-974936606,1913422229}, {-899064330,1950222896}, {-821805761,1984016458},
{-743280025,2014750808}, {-663609179,2042378239}, {-582914134,2066856823},
{-501320277,2088148461}, {-418953420,2106220322}, {-335940566,2121044542},
{-252409716,2132598263}, {-168489668,2140863668}, {-84309821,2145828015},
};
static const ne10_fft_cpx_int32_t ne10_twiddles_240[240] = {
{0,0}, {2147483647,0}, {2147483647,0},
{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
{-94,-2147483647}, {-112390835,-2144540584}, {-224473265,-2135719496},
{-335940431,-2121044564}, {-446487060,-2100555955}, {-555809896,-2074309855},
{-663609049,-2042378281}, {-769589300,-2004848703}, {-873460398,-1961823883},
{-974937397,-1913421827}, {-1073741932,-1859775330}, {-1169603421,-1801031330},
{-1262259116,-1737350839}, {-1351455453,-1668908078}, {-1436947137,-1595891268},
{-1518500258,-1518500240}, {-1595891628,-1436946738}, {-1668908417,-1351455035},
{-1737350854,-1262259096}, {-1801031344,-1169603400}, {-1859775343,-1073741910},
{-1913422071,-974936918}, {-1961823997,-873460141}, {-2004848713,-769589276},
{-2042378447,-663608538}, {-2074309994,-555809377}, {-2100556013,-446486785},
{-2121044568,-335940406}, {-2135719499,-224473240}, {-2144540612,-112390298},
{2147483647,0}, {2140863671,-168489630}, {2121044558,-335940465},
{2088148500,-501320115}, {2042378310,-663608960}, {1984016179,-821806435},
{1913421927,-974937199}, {1831030826,-1122057097}, {1737350743,-1262259248},
{1632959307,-1394679144}, {1518500216,-1518500282}, {1394679073,-1632959368},
{1262259172,-1737350799}, {1122057017,-1831030875}, {974937230,-1913421912},
{821806407,-1984016190}, {663608871,-2042378339}, {501319962,-2088148536},
{335940246,-2121044593}, {168489600,-2140863674}, {-94,-2147483647},
{-168489787,-2140863659}, {-335940431,-2121044564}, {-501320144,-2088148493},
{-663609049,-2042378281}, {-821806581,-1984016118}, {-974937397,-1913421827},
{-1122057395,-1831030643}, {-1262259116,-1737350839}, {-1394679021,-1632959413},
{-1518500258,-1518500240}, {-1632959429,-1394679001}, {-1737350854,-1262259096},
{-1831030924,-1122056937}, {-1913422071,-974936918}, {-1984016324,-821806084},
{-2042378447,-663608538}, {-2088148499,-501320119}, {-2121044568,-335940406},
{-2140863681,-168489506}, {-2147483647,188}, {-2140863651,168489881},
{-2121044509,335940777}, {-2088148411,501320484}, {-2042378331,663608895},
{-1984016181,821806431}, {-1913421900,974937252}, {-1831030728,1122057257},
{-1737350633,1262259400}, {-1632959185,1394679287}, {-1518499993,1518500506},
{-1394678735,1632959656}, {-1262258813,1737351059}, {-1122056638,1831031107},
{-974936606,1913422229}, {-821805761,1984016458}, {-663609179,2042378239},
{-501320277,2088148461}, {-335940566,2121044542}, {-168489668,2140863668},
};
static const ne10_fft_cpx_int32_t ne10_twiddles_120[120] = {
{0,0}, {2147483647,0}, {2147483647,0},
{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
};
static const ne10_fft_cpx_int32_t ne10_twiddles_60[60] = {
{0,0}, {2147483647,0}, {2147483647,0},
{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
};
static const ne10_fft_state_int32_t ne10_fft_state_int32_t_480 = {
120,
(ne10_int32_t *)ne10_factors_480,
(ne10_fft_cpx_int32_t *)ne10_twiddles_480,
NULL,
(ne10_fft_cpx_int32_t *)&ne10_twiddles_480[120],
};
static const arch_fft_state cfg_arch_480 = {
1,
(void *)&ne10_fft_state_int32_t_480,
};
static const ne10_fft_state_int32_t ne10_fft_state_int32_t_240 = {
60,
(ne10_int32_t *)ne10_factors_240,
(ne10_fft_cpx_int32_t *)ne10_twiddles_240,
NULL,
(ne10_fft_cpx_int32_t *)&ne10_twiddles_240[60],
};
static const arch_fft_state cfg_arch_240 = {
1,
(void *)&ne10_fft_state_int32_t_240,
};
static const ne10_fft_state_int32_t ne10_fft_state_int32_t_120 = {
30,
(ne10_int32_t *)ne10_factors_120,
(ne10_fft_cpx_int32_t *)ne10_twiddles_120,
NULL,
(ne10_fft_cpx_int32_t *)&ne10_twiddles_120[30],
};
static const arch_fft_state cfg_arch_120 = {
1,
(void *)&ne10_fft_state_int32_t_120,
};
static const ne10_fft_state_int32_t ne10_fft_state_int32_t_60 = {
15,
(ne10_int32_t *)ne10_factors_60,
(ne10_fft_cpx_int32_t *)ne10_twiddles_60,
NULL,
(ne10_fft_cpx_int32_t *)&ne10_twiddles_60[15],
};
static const arch_fft_state cfg_arch_60 = {
1,
(void *)&ne10_fft_state_int32_t_60,
};
#endif /* end NE10_FFT_PARAMS48000_960 */

View File

@ -4,6 +4,11 @@
#include "modes.h"
#include "rate.h"
#ifdef HAVE_ARM_NE10
#define OVERRIDE_FFT 1
#include "static_modes_float_arm_ne10.h"
#endif
#ifndef DEF_WINDOW120
#define DEF_WINDOW120
static const opus_val16 window120[120] = {
@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
#ifndef FFT_BITREV480
#define FFT_BITREV480
static const opus_int16 fft_bitrev480[480] = {
0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
};
#endif
#ifndef FFT_BITREV240
#define FFT_BITREV240
static const opus_int16 fft_bitrev240[240] = {
0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
};
#endif
#ifndef FFT_BITREV120
#define FFT_BITREV120
static const opus_int16 fft_bitrev120[120] = {
0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
};
#endif
#ifndef FFT_BITREV60
#define FFT_BITREV60
static const opus_int16 fft_bitrev60[60] = {
0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
};
#endif
@ -428,9 +433,14 @@ static const kiss_fft_state fft_state48000_960_0 = {
480, /* nfft */
0.002083333f, /* scale */
-1, /* shift */
{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev480, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_480,
#else
NULL,
#endif
};
#endif
@ -440,9 +450,14 @@ static const kiss_fft_state fft_state48000_960_1 = {
240, /* nfft */
0.004166667f, /* scale */
1, /* shift */
{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev240, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_240,
#else
NULL,
#endif
};
#endif
@ -452,9 +467,14 @@ static const kiss_fft_state fft_state48000_960_2 = {
120, /* nfft */
0.008333333f, /* scale */
2, /* shift */
{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev120, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_120,
#else
NULL,
#endif
};
#endif
@ -464,9 +484,14 @@ static const kiss_fft_state fft_state48000_960_3 = {
60, /* nfft */
0.016666667f, /* scale */
3, /* shift */
{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev60, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_60,
#else
NULL,
#endif
};
#endif
@ -474,104 +499,368 @@ fft_twiddles48000_960, /* bitrev */
#ifndef MDCT_TWIDDLES960
#define MDCT_TWIDDLES960
static const opus_val16 mdct_twiddles960[481] = {
1.0000000f, 0.99999465f, 0.99997858f, 0.99995181f, 0.99991433f,
0.99986614f, 0.99980724f, 0.99973764f, 0.99965732f, 0.99956631f,
0.99946459f, 0.99935216f, 0.99922904f, 0.99909521f, 0.99895068f,
0.99879546f, 0.99862953f, 0.99845292f, 0.99826561f, 0.99806761f,
0.99785892f, 0.99763955f, 0.99740949f, 0.99716875f, 0.99691733f,
0.99665524f, 0.99638247f, 0.99609903f, 0.99580493f, 0.99550016f,
0.99518473f, 0.99485864f, 0.99452190f, 0.99417450f, 0.99381646f,
0.99344778f, 0.99306846f, 0.99267850f, 0.99227791f, 0.99186670f,
0.99144486f, 0.99101241f, 0.99056934f, 0.99011566f, 0.98965139f,
0.98917651f, 0.98869104f, 0.98819498f, 0.98768834f, 0.98717112f,
0.98664333f, 0.98610497f, 0.98555606f, 0.98499659f, 0.98442657f,
0.98384600f, 0.98325491f, 0.98265328f, 0.98204113f, 0.98141846f,
0.98078528f, 0.98014159f, 0.97948742f, 0.97882275f, 0.97814760f,
0.97746197f, 0.97676588f, 0.97605933f, 0.97534232f, 0.97461487f,
0.97387698f, 0.97312866f, 0.97236992f, 0.97160077f, 0.97082121f,
0.97003125f, 0.96923091f, 0.96842019f, 0.96759909f, 0.96676764f,
0.96592582f, 0.96507367f, 0.96421118f, 0.96333837f, 0.96245523f,
0.96156180f, 0.96065806f, 0.95974403f, 0.95881973f, 0.95788517f,
0.95694034f, 0.95598526f, 0.95501995f, 0.95404440f, 0.95305864f,
0.95206267f, 0.95105651f, 0.95004016f, 0.94901364f, 0.94797697f,
0.94693013f, 0.94587315f, 0.94480604f, 0.94372882f, 0.94264149f,
0.94154406f, 0.94043656f, 0.93931897f, 0.93819133f, 0.93705365f,
0.93590592f, 0.93474818f, 0.93358042f, 0.93240268f, 0.93121493f,
0.93001722f, 0.92880955f, 0.92759193f, 0.92636438f, 0.92512690f,
0.92387953f, 0.92262225f, 0.92135509f, 0.92007809f, 0.91879121f,
0.91749449f, 0.91618795f, 0.91487161f, 0.91354545f, 0.91220952f,
0.91086382f, 0.90950836f, 0.90814316f, 0.90676824f, 0.90538363f,
0.90398929f, 0.90258528f, 0.90117161f, 0.89974828f, 0.89831532f,
0.89687273f, 0.89542055f, 0.89395877f, 0.89248742f, 0.89100652f,
0.88951606f, 0.88801610f, 0.88650661f, 0.88498764f, 0.88345918f,
0.88192125f, 0.88037390f, 0.87881711f, 0.87725090f, 0.87567531f,
0.87409035f, 0.87249599f, 0.87089232f, 0.86927933f, 0.86765699f,
0.86602540f, 0.86438453f, 0.86273437f, 0.86107503f, 0.85940641f,
0.85772862f, 0.85604161f, 0.85434547f, 0.85264014f, 0.85092572f,
0.84920218f, 0.84746955f, 0.84572781f, 0.84397704f, 0.84221721f,
0.84044838f, 0.83867056f, 0.83688375f, 0.83508799f, 0.83328325f,
0.83146961f, 0.82964704f, 0.82781562f, 0.82597530f, 0.82412620f,
0.82226820f, 0.82040144f, 0.81852589f, 0.81664154f, 0.81474847f,
0.81284665f, 0.81093620f, 0.80901698f, 0.80708914f, 0.80515262f,
0.80320752f, 0.80125378f, 0.79929149f, 0.79732067f, 0.79534125f,
0.79335335f, 0.79135691f, 0.78935204f, 0.78733867f, 0.78531691f,
0.78328674f, 0.78124818f, 0.77920122f, 0.77714595f, 0.77508232f,
0.77301043f, 0.77093026f, 0.76884183f, 0.76674517f, 0.76464026f,
0.76252720f, 0.76040593f, 0.75827656f, 0.75613907f, 0.75399349f,
0.75183978f, 0.74967807f, 0.74750833f, 0.74533054f, 0.74314481f,
0.74095112f, 0.73874950f, 0.73653993f, 0.73432251f, 0.73209718f,
0.72986405f, 0.72762307f, 0.72537438f, 0.72311787f, 0.72085359f,
0.71858162f, 0.71630192f, 0.71401459f, 0.71171956f, 0.70941701f,
0.70710677f, 0.70478900f, 0.70246363f, 0.70013079f, 0.69779041f,
0.69544260f, 0.69308738f, 0.69072466f, 0.68835458f, 0.68597709f,
0.68359229f, 0.68120013f, 0.67880072f, 0.67639404f, 0.67398011f,
0.67155892f, 0.66913059f, 0.66669509f, 0.66425240f, 0.66180265f,
0.65934581f, 0.65688191f, 0.65441092f, 0.65193298f, 0.64944801f,
0.64695613f, 0.64445727f, 0.64195160f, 0.63943902f, 0.63691954f,
0.63439328f, 0.63186019f, 0.62932037f, 0.62677377f, 0.62422055f,
0.62166055f, 0.61909394f, 0.61652065f, 0.61394081f, 0.61135435f,
0.60876139f, 0.60616195f, 0.60355593f, 0.60094349f, 0.59832457f,
0.59569929f, 0.59306758f, 0.59042957f, 0.58778523f, 0.58513460f,
0.58247766f, 0.57981452f, 0.57714518f, 0.57446961f, 0.57178793f,
0.56910013f, 0.56640624f, 0.56370623f, 0.56100023f, 0.55828818f,
0.55557020f, 0.55284627f, 0.55011641f, 0.54738067f, 0.54463901f,
0.54189157f, 0.53913828f, 0.53637921f, 0.53361450f, 0.53084398f,
0.52806787f, 0.52528601f, 0.52249852f, 0.51970543f, 0.51690688f,
0.51410279f, 0.51129310f, 0.50847793f, 0.50565732f, 0.50283139f,
0.49999997f, 0.49716321f, 0.49432122f, 0.49147383f, 0.48862118f,
0.48576340f, 0.48290042f, 0.48003216f, 0.47715876f, 0.47428025f,
0.47139677f, 0.46850813f, 0.46561448f, 0.46271584f, 0.45981235f,
0.45690383f, 0.45399042f, 0.45107214f, 0.44814915f, 0.44522124f,
0.44228868f, 0.43935137f, 0.43640926f, 0.43346247f, 0.43051104f,
0.42755511f, 0.42459449f, 0.42162932f, 0.41865964f, 0.41568558f,
0.41270697f, 0.40972393f, 0.40673661f, 0.40374494f, 0.40074884f,
0.39774844f, 0.39474390f, 0.39173501f, 0.38872193f, 0.38570469f,
0.38268343f, 0.37965796f, 0.37662842f, 0.37359496f, 0.37055739f,
0.36751585f, 0.36447038f, 0.36142122f, 0.35836797f, 0.35531089f,
0.35225000f, 0.34918544f, 0.34611704f, 0.34304493f, 0.33996926f,
0.33688983f, 0.33380680f, 0.33072019f, 0.32763015f, 0.32453650f,
0.32143936f, 0.31833890f, 0.31523503f, 0.31212767f, 0.30901696f,
0.30590306f, 0.30278577f, 0.29966524f, 0.29654150f, 0.29341470f,
0.29028464f, 0.28715147f, 0.28401522f, 0.28087605f, 0.27773376f,
0.27458861f, 0.27144052f, 0.26828940f, 0.26513541f, 0.26197859f,
0.25881907f, 0.25565666f, 0.25249152f, 0.24932367f, 0.24615327f,
0.24298012f, 0.23980436f, 0.23662604f, 0.23344530f, 0.23026206f,
0.22707623f, 0.22388809f, 0.22069744f, 0.21750443f, 0.21430908f,
0.21111156f, 0.20791165f, 0.20470953f, 0.20150520f, 0.19829884f,
0.19509024f, 0.19187955f, 0.18866692f, 0.18545227f, 0.18223552f,
0.17901681f, 0.17579631f, 0.17257380f, 0.16934945f, 0.16612328f,
0.16289546f, 0.15966577f, 0.15643437f, 0.15320141f, 0.14996669f,
0.14673037f, 0.14349260f, 0.14025329f, 0.13701235f, 0.13376995f,
0.13052612f, 0.12728101f, 0.12403442f, 0.12078650f, 0.11753740f,
0.11428693f, 0.11103523f, 0.10778234f, 0.10452842f, 0.10127326f,
0.098017137f, 0.094759842f, 0.091501652f, 0.088242363f, 0.084982129f,
0.081721103f, 0.078459084f, 0.075196224f, 0.071932560f, 0.068668243f,
0.065403073f, 0.062137201f, 0.058870665f, 0.055603617f, 0.052335974f,
0.049067651f, 0.045798921f, 0.042529582f, 0.039259788f, 0.035989573f,
0.032719092f, 0.029448142f, 0.026176876f, 0.022905329f, 0.019633657f,
0.016361655f, 0.013089478f, 0.0098171604f, 0.0065449764f, 0.0032724839f,
-4.3711390e-08f, };
static const opus_val16 mdct_twiddles960[1800] = {
0.99999994f, 0.99999321f, 0.99997580f, 0.99994773f, 0.99990886f,
0.99985933f, 0.99979913f, 0.99972820f, 0.99964654f, 0.99955416f,
0.99945110f, 0.99933738f, 0.99921292f, 0.99907774f, 0.99893188f,
0.99877530f, 0.99860805f, 0.99843007f, 0.99824142f, 0.99804211f,
0.99783206f, 0.99761140f, 0.99737996f, 0.99713790f, 0.99688518f,
0.99662173f, 0.99634761f, 0.99606287f, 0.99576741f, 0.99546129f,
0.99514455f, 0.99481714f, 0.99447906f, 0.99413031f, 0.99377096f,
0.99340093f, 0.99302030f, 0.99262899f, 0.99222708f, 0.99181455f,
0.99139136f, 0.99095762f, 0.99051321f, 0.99005818f, 0.98959261f,
0.98911643f, 0.98862964f, 0.98813224f, 0.98762429f, 0.98710573f,
0.98657662f, 0.98603696f, 0.98548669f, 0.98492593f, 0.98435456f,
0.98377270f, 0.98318028f, 0.98257732f, 0.98196387f, 0.98133987f,
0.98070538f, 0.98006040f, 0.97940493f, 0.97873890f, 0.97806245f,
0.97737551f, 0.97667813f, 0.97597027f, 0.97525197f, 0.97452319f,
0.97378403f, 0.97303438f, 0.97227436f, 0.97150391f, 0.97072303f,
0.96993178f, 0.96913016f, 0.96831810f, 0.96749574f, 0.96666300f,
0.96581990f, 0.96496642f, 0.96410263f, 0.96322852f, 0.96234411f,
0.96144938f, 0.96054435f, 0.95962906f, 0.95870346f, 0.95776761f,
0.95682150f, 0.95586514f, 0.95489854f, 0.95392174f, 0.95293468f,
0.95193744f, 0.95093000f, 0.94991243f, 0.94888461f, 0.94784665f,
0.94679856f, 0.94574034f, 0.94467193f, 0.94359344f, 0.94250488f,
0.94140619f, 0.94029742f, 0.93917859f, 0.93804967f, 0.93691075f,
0.93576175f, 0.93460274f, 0.93343377f, 0.93225473f, 0.93106574f,
0.92986679f, 0.92865789f, 0.92743903f, 0.92621022f, 0.92497152f,
0.92372292f, 0.92246443f, 0.92119598f, 0.91991776f, 0.91862965f,
0.91733170f, 0.91602397f, 0.91470635f, 0.91337901f, 0.91204184f,
0.91069490f, 0.90933824f, 0.90797186f, 0.90659571f, 0.90520984f,
0.90381432f, 0.90240908f, 0.90099424f, 0.89956969f, 0.89813554f,
0.89669174f, 0.89523834f, 0.89377540f, 0.89230281f, 0.89082074f,
0.88932908f, 0.88782793f, 0.88631725f, 0.88479710f, 0.88326746f,
0.88172835f, 0.88017982f, 0.87862182f, 0.87705445f, 0.87547767f,
0.87389153f, 0.87229604f, 0.87069118f, 0.86907703f, 0.86745358f,
0.86582077f, 0.86417878f, 0.86252749f, 0.86086690f, 0.85919720f,
0.85751826f, 0.85583007f, 0.85413277f, 0.85242635f, 0.85071075f,
0.84898609f, 0.84725231f, 0.84550947f, 0.84375757f, 0.84199661f,
0.84022665f, 0.83844769f, 0.83665979f, 0.83486289f, 0.83305705f,
0.83124226f, 0.82941860f, 0.82758605f, 0.82574469f, 0.82389444f,
0.82203537f, 0.82016748f, 0.81829083f, 0.81640542f, 0.81451124f,
0.81260836f, 0.81069672f, 0.80877650f, 0.80684757f, 0.80490994f,
0.80296379f, 0.80100900f, 0.79904562f, 0.79707366f, 0.79509324f,
0.79310423f, 0.79110676f, 0.78910083f, 0.78708643f, 0.78506362f,
0.78303236f, 0.78099275f, 0.77894479f, 0.77688843f, 0.77482378f,
0.77275085f, 0.77066964f, 0.76858020f, 0.76648247f, 0.76437658f,
0.76226246f, 0.76014024f, 0.75800985f, 0.75587130f, 0.75372469f,
0.75157005f, 0.74940729f, 0.74723655f, 0.74505776f, 0.74287105f,
0.74067634f, 0.73847371f, 0.73626316f, 0.73404479f, 0.73181850f,
0.72958434f, 0.72734243f, 0.72509271f, 0.72283524f, 0.72057003f,
0.71829706f, 0.71601641f, 0.71372813f, 0.71143216f, 0.70912862f,
0.70681745f, 0.70449871f, 0.70217246f, 0.69983864f, 0.69749737f,
0.69514859f, 0.69279242f, 0.69042879f, 0.68805778f, 0.68567938f,
0.68329364f, 0.68090063f, 0.67850029f, 0.67609268f, 0.67367786f,
0.67125577f, 0.66882652f, 0.66639012f, 0.66394657f, 0.66149592f,
0.65903819f, 0.65657341f, 0.65410155f, 0.65162271f, 0.64913690f,
0.64664418f, 0.64414448f, 0.64163786f, 0.63912445f, 0.63660413f,
0.63407701f, 0.63154310f, 0.62900239f, 0.62645501f, 0.62390089f,
0.62134010f, 0.61877263f, 0.61619854f, 0.61361790f, 0.61103064f,
0.60843682f, 0.60583651f, 0.60322970f, 0.60061646f, 0.59799677f,
0.59537065f, 0.59273821f, 0.59009939f, 0.58745426f, 0.58480281f,
0.58214509f, 0.57948118f, 0.57681108f, 0.57413477f, 0.57145232f,
0.56876373f, 0.56606907f, 0.56336832f, 0.56066155f, 0.55794877f,
0.55523002f, 0.55250537f, 0.54977477f, 0.54703826f, 0.54429591f,
0.54154772f, 0.53879374f, 0.53603399f, 0.53326851f, 0.53049731f,
0.52772039f, 0.52493787f, 0.52214974f, 0.51935595f, 0.51655668f,
0.51375180f, 0.51094145f, 0.50812566f, 0.50530440f, 0.50247771f,
0.49964568f, 0.49680826f, 0.49396557f, 0.49111754f, 0.48826426f,
0.48540577f, 0.48254207f, 0.47967321f, 0.47679919f, 0.47392011f,
0.47103590f, 0.46814668f, 0.46525243f, 0.46235323f, 0.45944905f,
0.45653993f, 0.45362595f, 0.45070711f, 0.44778344f, 0.44485497f,
0.44192174f, 0.43898380f, 0.43604112f, 0.43309379f, 0.43014181f,
0.42718524f, 0.42422408f, 0.42125839f, 0.41828820f, 0.41531351f,
0.41233435f, 0.40935081f, 0.40636289f, 0.40337059f, 0.40037400f,
0.39737311f, 0.39436796f, 0.39135858f, 0.38834500f, 0.38532731f,
0.38230544f, 0.37927949f, 0.37624949f, 0.37321547f, 0.37017745f,
0.36713544f, 0.36408952f, 0.36103970f, 0.35798600f, 0.35492846f,
0.35186714f, 0.34880206f, 0.34573323f, 0.34266070f, 0.33958447f,
0.33650464f, 0.33342120f, 0.33033419f, 0.32724363f, 0.32414958f,
0.32105204f, 0.31795108f, 0.31484672f, 0.31173897f, 0.30862790f,
0.30551350f, 0.30239585f, 0.29927495f, 0.29615086f, 0.29302359f,
0.28989318f, 0.28675964f, 0.28362307f, 0.28048345f, 0.27734083f,
0.27419522f, 0.27104670f, 0.26789525f, 0.26474094f, 0.26158381f,
0.25842386f, 0.25526115f, 0.25209570f, 0.24892756f, 0.24575676f,
0.24258332f, 0.23940729f, 0.23622867f, 0.23304754f, 0.22986393f,
0.22667783f, 0.22348931f, 0.22029841f, 0.21710514f, 0.21390954f,
0.21071166f, 0.20751151f, 0.20430915f, 0.20110460f, 0.19789790f,
0.19468907f, 0.19147816f, 0.18826519f, 0.18505022f, 0.18183327f,
0.17861435f, 0.17539354f, 0.17217083f, 0.16894630f, 0.16571994f,
0.16249183f, 0.15926196f, 0.15603039f, 0.15279715f, 0.14956227f,
0.14632578f, 0.14308774f, 0.13984816f, 0.13660708f, 0.13336454f,
0.13012058f, 0.12687522f, 0.12362850f, 0.12038045f, 0.11713112f,
0.11388054f, 0.11062872f, 0.10737573f, 0.10412160f, 0.10086634f,
0.097609997f, 0.094352618f, 0.091094226f, 0.087834857f, 0.084574550f,
0.081313334f, 0.078051247f, 0.074788325f, 0.071524605f, 0.068260118f,
0.064994894f, 0.061728980f, 0.058462404f, 0.055195201f, 0.051927410f,
0.048659060f, 0.045390189f, 0.042120833f, 0.038851023f, 0.035580799f,
0.032310195f, 0.029039243f, 0.025767982f, 0.022496443f, 0.019224664f,
0.015952680f, 0.012680525f, 0.0094082337f, 0.0061358409f, 0.0028633832f,
-0.00040910527f, -0.0036815894f, -0.0069540343f, -0.010226404f, -0.013498665f,
-0.016770782f, -0.020042717f, -0.023314439f, -0.026585912f, -0.029857099f,
-0.033127967f, -0.036398482f, -0.039668605f, -0.042938303f, -0.046207540f,
-0.049476285f, -0.052744497f, -0.056012146f, -0.059279196f, -0.062545612f,
-0.065811358f, -0.069076397f, -0.072340697f, -0.075604223f, -0.078866936f,
-0.082128808f, -0.085389800f, -0.088649876f, -0.091909006f, -0.095167145f,
-0.098424271f, -0.10168034f, -0.10493532f, -0.10818918f, -0.11144188f,
-0.11469338f, -0.11794366f, -0.12119267f, -0.12444039f, -0.12768677f,
-0.13093179f, -0.13417540f, -0.13741758f, -0.14065829f, -0.14389749f,
-0.14713514f, -0.15037122f, -0.15360570f, -0.15683852f, -0.16006967f,
-0.16329910f, -0.16652679f, -0.16975269f, -0.17297678f, -0.17619900f,
-0.17941935f, -0.18263777f, -0.18585424f, -0.18906870f, -0.19228116f,
-0.19549155f, -0.19869985f, -0.20190603f, -0.20511003f, -0.20831184f,
-0.21151142f, -0.21470875f, -0.21790376f, -0.22109644f, -0.22428675f,
-0.22747467f, -0.23066014f, -0.23384315f, -0.23702365f, -0.24020162f,
-0.24337701f, -0.24654980f, -0.24971995f, -0.25288740f, -0.25605217f,
-0.25921419f, -0.26237345f, -0.26552987f, -0.26868346f, -0.27183419f,
-0.27498198f, -0.27812684f, -0.28126872f, -0.28440759f, -0.28754342f,
-0.29067615f, -0.29380578f, -0.29693225f, -0.30005556f, -0.30317566f,
-0.30629250f, -0.30940607f, -0.31251630f, -0.31562322f, -0.31872672f,
-0.32182685f, -0.32492352f, -0.32801670f, -0.33110636f, -0.33419248f,
-0.33727503f, -0.34035397f, -0.34342924f, -0.34650084f, -0.34956875f,
-0.35263291f, -0.35569328f, -0.35874987f, -0.36180258f, -0.36485144f,
-0.36789638f, -0.37093741f, -0.37397444f, -0.37700745f, -0.38003644f,
-0.38306138f, -0.38608220f, -0.38909888f, -0.39211139f, -0.39511973f,
-0.39812380f, -0.40112361f, -0.40411916f, -0.40711036f, -0.41009718f,
-0.41307965f, -0.41605768f, -0.41903123f, -0.42200032f, -0.42496487f,
-0.42792490f, -0.43088034f, -0.43383113f, -0.43677729f, -0.43971881f,
-0.44265559f, -0.44558764f, -0.44851488f, -0.45143735f, -0.45435500f,
-0.45726776f, -0.46017563f, -0.46307856f, -0.46597654f, -0.46886954f,
-0.47175750f, -0.47464043f, -0.47751826f, -0.48039100f, -0.48325855f,
-0.48612097f, -0.48897815f, -0.49183011f, -0.49467680f, -0.49751821f,
-0.50035429f, -0.50318497f, -0.50601029f, -0.50883019f, -0.51164466f,
-0.51445359f, -0.51725709f, -0.52005500f, -0.52284735f, -0.52563411f,
-0.52841520f, -0.53119069f, -0.53396046f, -0.53672451f, -0.53948283f,
-0.54223537f, -0.54498214f, -0.54772300f, -0.55045801f, -0.55318713f,
-0.55591035f, -0.55862761f, -0.56133890f, -0.56404412f, -0.56674337f,
-0.56943649f, -0.57212353f, -0.57480448f, -0.57747924f, -0.58014780f,
-0.58281022f, -0.58546633f, -0.58811617f, -0.59075975f, -0.59339696f,
-0.59602785f, -0.59865236f, -0.60127044f, -0.60388207f, -0.60648727f,
-0.60908598f, -0.61167812f, -0.61426371f, -0.61684275f, -0.61941516f,
-0.62198097f, -0.62454009f, -0.62709254f, -0.62963831f, -0.63217729f,
-0.63470948f, -0.63723493f, -0.63975352f, -0.64226526f, -0.64477009f,
-0.64726806f, -0.64975911f, -0.65224314f, -0.65472025f, -0.65719032f,
-0.65965337f, -0.66210932f, -0.66455823f, -0.66700000f, -0.66943461f,
-0.67186207f, -0.67428231f, -0.67669535f, -0.67910111f, -0.68149966f,
-0.68389088f, -0.68627477f, -0.68865126f, -0.69102043f, -0.69338220f,
-0.69573659f, -0.69808346f, -0.70042288f, -0.70275480f, -0.70507920f,
-0.70739603f, -0.70970529f, -0.71200693f, -0.71430099f, -0.71658736f,
-0.71886611f, -0.72113711f, -0.72340041f, -0.72565591f, -0.72790372f,
-0.73014367f, -0.73237586f, -0.73460019f, -0.73681659f, -0.73902518f,
-0.74122584f, -0.74341851f, -0.74560326f, -0.74778003f, -0.74994880f,
-0.75210953f, -0.75426215f, -0.75640678f, -0.75854325f, -0.76067162f,
-0.76279181f, -0.76490390f, -0.76700771f, -0.76910341f, -0.77119076f,
-0.77326995f, -0.77534080f, -0.77740335f, -0.77945763f, -0.78150350f,
-0.78354102f, -0.78557014f, -0.78759086f, -0.78960317f, -0.79160696f,
-0.79360235f, -0.79558921f, -0.79756755f, -0.79953730f, -0.80149853f,
-0.80345118f, -0.80539525f, -0.80733067f, -0.80925739f, -0.81117553f,
-0.81308490f, -0.81498563f, -0.81687760f, -0.81876087f, -0.82063532f,
-0.82250100f, -0.82435787f, -0.82620591f, -0.82804507f, -0.82987541f,
-0.83169687f, -0.83350939f, -0.83531296f, -0.83710766f, -0.83889335f,
-0.84067005f, -0.84243774f, -0.84419644f, -0.84594607f, -0.84768665f,
-0.84941816f, -0.85114056f, -0.85285389f, -0.85455805f, -0.85625303f,
-0.85793889f, -0.85961550f, -0.86128294f, -0.86294121f, -0.86459017f,
-0.86622989f, -0.86786032f, -0.86948150f, -0.87109333f, -0.87269586f,
-0.87428904f, -0.87587279f, -0.87744725f, -0.87901229f, -0.88056785f,
-0.88211405f, -0.88365078f, -0.88517809f, -0.88669586f, -0.88820416f,
-0.88970292f, -0.89119220f, -0.89267188f, -0.89414203f, -0.89560264f,
-0.89705360f, -0.89849502f, -0.89992678f, -0.90134889f, -0.90276134f,
-0.90416414f, -0.90555727f, -0.90694070f, -0.90831441f, -0.90967834f,
-0.91103262f, -0.91237706f, -0.91371179f, -0.91503674f, -0.91635185f,
-0.91765714f, -0.91895264f, -0.92023826f, -0.92151409f, -0.92277998f,
-0.92403603f, -0.92528218f, -0.92651838f, -0.92774469f, -0.92896110f,
-0.93016750f, -0.93136400f, -0.93255049f, -0.93372697f, -0.93489349f,
-0.93604994f, -0.93719643f, -0.93833286f, -0.93945926f, -0.94057560f,
-0.94168180f, -0.94277799f, -0.94386405f, -0.94494003f, -0.94600588f,
-0.94706154f, -0.94810712f, -0.94914252f, -0.95016778f, -0.95118284f,
-0.95218778f, -0.95318246f, -0.95416695f, -0.95514119f, -0.95610523f,
-0.95705903f, -0.95800257f, -0.95893586f, -0.95985889f, -0.96077162f,
-0.96167403f, -0.96256620f, -0.96344805f, -0.96431959f, -0.96518075f,
-0.96603161f, -0.96687216f, -0.96770233f, -0.96852213f, -0.96933156f,
-0.97013056f, -0.97091925f, -0.97169751f, -0.97246534f, -0.97322279f,
-0.97396982f, -0.97470641f, -0.97543252f, -0.97614825f, -0.97685349f,
-0.97754824f, -0.97823256f, -0.97890645f, -0.97956979f, -0.98022264f,
-0.98086500f, -0.98149687f, -0.98211825f, -0.98272908f, -0.98332942f,
-0.98391914f, -0.98449844f, -0.98506713f, -0.98562527f, -0.98617285f,
-0.98670989f, -0.98723638f, -0.98775226f, -0.98825759f, -0.98875231f,
-0.98923647f, -0.98971003f, -0.99017298f, -0.99062532f, -0.99106705f,
-0.99149817f, -0.99191868f, -0.99232858f, -0.99272782f, -0.99311644f,
-0.99349445f, -0.99386179f, -0.99421853f, -0.99456459f, -0.99489999f,
-0.99522477f, -0.99553883f, -0.99584228f, -0.99613506f, -0.99641716f,
-0.99668860f, -0.99694937f, -0.99719942f, -0.99743885f, -0.99766755f,
-0.99788558f, -0.99809295f, -0.99828959f, -0.99847561f, -0.99865085f,
-0.99881548f, -0.99896932f, -0.99911255f, -0.99924499f, -0.99936682f,
-0.99947786f, -0.99957830f, -0.99966794f, -0.99974692f, -0.99981517f,
-0.99987274f, -0.99991959f, -0.99995571f, -0.99998116f, -0.99999589f,
0.99999964f, 0.99997288f, 0.99990326f, 0.99979085f, 0.99963558f,
0.99943751f, 0.99919659f, 0.99891287f, 0.99858636f, 0.99821711f,
0.99780506f, 0.99735034f, 0.99685282f, 0.99631262f, 0.99572974f,
0.99510419f, 0.99443603f, 0.99372530f, 0.99297196f, 0.99217612f,
0.99133772f, 0.99045694f, 0.98953366f, 0.98856801f, 0.98756003f,
0.98650974f, 0.98541719f, 0.98428243f, 0.98310548f, 0.98188645f,
0.98062533f, 0.97932225f, 0.97797716f, 0.97659022f, 0.97516143f,
0.97369087f, 0.97217858f, 0.97062469f, 0.96902919f, 0.96739221f,
0.96571374f, 0.96399397f, 0.96223283f, 0.96043050f, 0.95858705f,
0.95670253f, 0.95477700f, 0.95281059f, 0.95080340f, 0.94875544f,
0.94666684f, 0.94453770f, 0.94236809f, 0.94015813f, 0.93790787f,
0.93561745f, 0.93328691f, 0.93091643f, 0.92850608f, 0.92605597f,
0.92356616f, 0.92103678f, 0.91846794f, 0.91585976f, 0.91321236f,
0.91052586f, 0.90780038f, 0.90503591f, 0.90223277f, 0.89939094f,
0.89651060f, 0.89359182f, 0.89063478f, 0.88763964f, 0.88460642f,
0.88153529f, 0.87842643f, 0.87527996f, 0.87209594f, 0.86887461f,
0.86561602f, 0.86232042f, 0.85898781f, 0.85561842f, 0.85221243f,
0.84876984f, 0.84529096f, 0.84177583f, 0.83822471f, 0.83463764f,
0.83101481f, 0.82735640f, 0.82366252f, 0.81993335f, 0.81616908f,
0.81236988f, 0.80853581f, 0.80466717f, 0.80076402f, 0.79682660f,
0.79285502f, 0.78884947f, 0.78481019f, 0.78073722f, 0.77663082f,
0.77249116f, 0.76831841f, 0.76411277f, 0.75987434f, 0.75560343f,
0.75130010f, 0.74696463f, 0.74259710f, 0.73819780f, 0.73376691f,
0.72930455f, 0.72481096f, 0.72028631f, 0.71573079f, 0.71114463f,
0.70652801f, 0.70188117f, 0.69720417f, 0.69249737f, 0.68776089f,
0.68299496f, 0.67819971f, 0.67337549f, 0.66852236f, 0.66364062f,
0.65873051f, 0.65379208f, 0.64882571f, 0.64383155f, 0.63880974f,
0.63376063f, 0.62868434f, 0.62358117f, 0.61845124f, 0.61329484f,
0.60811216f, 0.60290343f, 0.59766883f, 0.59240872f, 0.58712316f,
0.58181250f, 0.57647687f, 0.57111657f, 0.56573176f, 0.56032276f,
0.55488980f, 0.54943299f, 0.54395270f, 0.53844911f, 0.53292239f,
0.52737290f, 0.52180082f, 0.51620632f, 0.51058978f, 0.50495136f,
0.49929130f, 0.49360985f, 0.48790723f, 0.48218375f, 0.47643960f,
0.47067502f, 0.46489030f, 0.45908567f, 0.45326138f, 0.44741765f,
0.44155475f, 0.43567297f, 0.42977250f, 0.42385364f, 0.41791660f,
0.41196167f, 0.40598908f, 0.39999911f, 0.39399201f, 0.38796803f,
0.38192743f, 0.37587047f, 0.36979741f, 0.36370850f, 0.35760403f,
0.35148421f, 0.34534934f, 0.33919969f, 0.33303553f, 0.32685706f,
0.32066461f, 0.31445843f, 0.30823877f, 0.30200592f, 0.29576012f,
0.28950164f, 0.28323078f, 0.27694780f, 0.27065292f, 0.26434645f,
0.25802869f, 0.25169984f, 0.24536023f, 0.23901010f, 0.23264973f,
0.22627939f, 0.21989937f, 0.21350993f, 0.20711134f, 0.20070387f,
0.19428782f, 0.18786344f, 0.18143101f, 0.17499080f, 0.16854310f,
0.16208819f, 0.15562633f, 0.14915779f, 0.14268288f, 0.13620184f,
0.12971498f, 0.12322257f, 0.11672486f, 0.11022217f, 0.10371475f,
0.097202882f, 0.090686858f, 0.084166944f, 0.077643424f, 0.071116582f,
0.064586692f, 0.058054037f, 0.051518895f, 0.044981543f, 0.038442269f,
0.031901345f, 0.025359053f, 0.018815678f, 0.012271495f, 0.0057267868f,
-0.00081816671f, -0.0073630852f, -0.013907688f, -0.020451695f, -0.026994826f,
-0.033536803f, -0.040077340f, -0.046616159f, -0.053152986f, -0.059687532f,
-0.066219524f, -0.072748676f, -0.079274714f, -0.085797355f, -0.092316322f,
-0.098831341f, -0.10534211f, -0.11184838f, -0.11834986f, -0.12484626f,
-0.13133731f, -0.13782275f, -0.14430228f, -0.15077563f, -0.15724251f,
-0.16370267f, -0.17015581f, -0.17660165f, -0.18303993f, -0.18947038f,
-0.19589271f, -0.20230664f, -0.20871192f, -0.21510825f, -0.22149536f,
-0.22787298f, -0.23424086f, -0.24059868f, -0.24694622f, -0.25328314f,
-0.25960925f, -0.26592422f, -0.27222782f, -0.27851975f, -0.28479972f,
-0.29106751f, -0.29732284f, -0.30356544f, -0.30979502f, -0.31601134f,
-0.32221413f, -0.32840309f, -0.33457801f, -0.34073856f, -0.34688455f,
-0.35301566f, -0.35913166f, -0.36523229f, -0.37131724f, -0.37738630f,
-0.38343921f, -0.38947567f, -0.39549544f, -0.40149832f, -0.40748394f,
-0.41345215f, -0.41940263f, -0.42533514f, -0.43124944f, -0.43714526f,
-0.44302234f, -0.44888046f, -0.45471936f, -0.46053877f, -0.46633846f,
-0.47211814f, -0.47787762f, -0.48361665f, -0.48933494f, -0.49503228f,
-0.50070840f, -0.50636309f, -0.51199609f, -0.51760709f, -0.52319598f,
-0.52876246f, -0.53430629f, -0.53982723f, -0.54532504f, -0.55079949f,
-0.55625033f, -0.56167740f, -0.56708032f, -0.57245898f, -0.57781315f,
-0.58314258f, -0.58844697f, -0.59372622f, -0.59897995f, -0.60420811f,
-0.60941035f, -0.61458647f, -0.61973625f, -0.62485951f, -0.62995601f,
-0.63502556f, -0.64006782f, -0.64508271f, -0.65007001f, -0.65502942f,
-0.65996075f, -0.66486382f, -0.66973841f, -0.67458433f, -0.67940134f,
-0.68418926f, -0.68894786f, -0.69367695f, -0.69837630f, -0.70304573f,
-0.70768511f, -0.71229410f, -0.71687263f, -0.72142041f, -0.72593731f,
-0.73042315f, -0.73487765f, -0.73930067f, -0.74369204f, -0.74805158f,
-0.75237900f, -0.75667429f, -0.76093709f, -0.76516730f, -0.76936477f,
-0.77352923f, -0.77766061f, -0.78175867f, -0.78582323f, -0.78985411f,
-0.79385114f, -0.79781419f, -0.80174309f, -0.80563760f, -0.80949765f,
-0.81332302f, -0.81711352f, -0.82086903f, -0.82458937f, -0.82827437f,
-0.83192390f, -0.83553779f, -0.83911592f, -0.84265804f, -0.84616417f,
-0.84963393f, -0.85306740f, -0.85646427f, -0.85982448f, -0.86314780f,
-0.86643422f, -0.86968350f, -0.87289548f, -0.87607014f, -0.87920725f,
-0.88230664f, -0.88536829f, -0.88839203f, -0.89137769f, -0.89432514f,
-0.89723432f, -0.90010506f, -0.90293723f, -0.90573072f, -0.90848541f,
-0.91120118f, -0.91387796f, -0.91651553f, -0.91911387f, -0.92167282f,
-0.92419231f, -0.92667222f, -0.92911243f, -0.93151283f, -0.93387336f,
-0.93619382f, -0.93847424f, -0.94071442f, -0.94291431f, -0.94507378f,
-0.94719279f, -0.94927126f, -0.95130903f, -0.95330608f, -0.95526224f,
-0.95717752f, -0.95905179f, -0.96088499f, -0.96267700f, -0.96442777f,
-0.96613729f, -0.96780539f, -0.96943200f, -0.97101706f, -0.97256058f,
-0.97406244f, -0.97552258f, -0.97694093f, -0.97831738f, -0.97965199f,
-0.98094457f, -0.98219514f, -0.98340368f, -0.98457009f, -0.98569429f,
-0.98677629f, -0.98781598f, -0.98881340f, -0.98976845f, -0.99068111f,
-0.99155134f, -0.99237907f, -0.99316430f, -0.99390697f, -0.99460709f,
-0.99526459f, -0.99587947f, -0.99645168f, -0.99698120f, -0.99746799f,
-0.99791211f, -0.99831343f, -0.99867201f, -0.99898779f, -0.99926084f,
-0.99949104f, -0.99967843f, -0.99982297f, -0.99992472f, -0.99998361f,
0.99999869f, 0.99989158f, 0.99961317f, 0.99916345f, 0.99854255f,
0.99775058f, 0.99678761f, 0.99565387f, 0.99434954f, 0.99287480f,
0.99122995f, 0.98941529f, 0.98743105f, 0.98527765f, 0.98295540f,
0.98046476f, 0.97780609f, 0.97497988f, 0.97198665f, 0.96882683f,
0.96550101f, 0.96200979f, 0.95835376f, 0.95453346f, 0.95054960f,
0.94640291f, 0.94209403f, 0.93762374f, 0.93299282f, 0.92820197f,
0.92325211f, 0.91814411f, 0.91287869f, 0.90745693f, 0.90187967f,
0.89614785f, 0.89026248f, 0.88422459f, 0.87803519f, 0.87169534f,
0.86520612f, 0.85856867f, 0.85178405f, 0.84485358f, 0.83777827f,
0.83055943f, 0.82319832f, 0.81569612f, 0.80805415f, 0.80027372f,
0.79235619f, 0.78430289f, 0.77611518f, 0.76779449f, 0.75934225f,
0.75075996f, 0.74204898f, 0.73321080f, 0.72424710f, 0.71515924f,
0.70594883f, 0.69661748f, 0.68716675f, 0.67759830f, 0.66791373f,
0.65811473f, 0.64820296f, 0.63818014f, 0.62804794f, 0.61780810f,
0.60746247f, 0.59701276f, 0.58646071f, 0.57580817f, 0.56505698f,
0.55420899f, 0.54326600f, 0.53222996f, 0.52110273f, 0.50988621f,
0.49858227f, 0.48719296f, 0.47572014f, 0.46416581f, 0.45253196f,
0.44082057f, 0.42903364f, 0.41717321f, 0.40524128f, 0.39323992f,
0.38117120f, 0.36903715f, 0.35683987f, 0.34458145f, 0.33226398f,
0.31988961f, 0.30746040f, 0.29497850f, 0.28244606f, 0.26986524f,
0.25723818f, 0.24456702f, 0.23185398f, 0.21910121f, 0.20631088f,
0.19348522f, 0.18062639f, 0.16773662f, 0.15481812f, 0.14187308f,
0.12890373f, 0.11591230f, 0.10290100f, 0.089872077f, 0.076827750f,
0.063770257f, 0.050701842f, 0.037624735f, 0.024541186f, 0.011453429f,
-0.0016362892f, -0.014725727f, -0.027812643f, -0.040894791f, -0.053969935f,
-0.067035832f, -0.080090240f, -0.093130924f, -0.10615565f, -0.11916219f,
-0.13214831f, -0.14511178f, -0.15805040f, -0.17096193f, -0.18384418f,
-0.19669491f, -0.20951195f, -0.22229309f, -0.23503613f, -0.24773891f,
-0.26039925f, -0.27301496f, -0.28558388f, -0.29810387f, -0.31057280f,
-0.32298848f, -0.33534884f, -0.34765175f, -0.35989508f, -0.37207675f,
-0.38419467f, -0.39624676f, -0.40823093f, -0.42014518f, -0.43198743f,
-0.44375566f, -0.45544785f, -0.46706200f, -0.47859612f, -0.49004826f,
-0.50141639f, -0.51269865f, -0.52389306f, -0.53499764f, -0.54601061f,
-0.55693001f, -0.56775403f, -0.57848072f, -0.58910829f, -0.59963489f,
-0.61005878f, -0.62037814f, -0.63059121f, -0.64069623f, -0.65069145f,
-0.66057515f, -0.67034572f, -0.68000144f, -0.68954057f, -0.69896162f,
-0.70826286f, -0.71744281f, -0.72649974f, -0.73543227f, -0.74423873f,
-0.75291771f, -0.76146764f, -0.76988715f, -0.77817470f, -0.78632891f,
-0.79434842f, -0.80223179f, -0.80997771f, -0.81758487f, -0.82505190f,
-0.83237761f, -0.83956063f, -0.84659988f, -0.85349399f, -0.86024189f,
-0.86684239f, -0.87329435f, -0.87959671f, -0.88574833f, -0.89174819f,
-0.89759529f, -0.90328854f, -0.90882701f, -0.91420978f, -0.91943592f,
-0.92450452f, -0.92941469f, -0.93416560f, -0.93875647f, -0.94318646f,
-0.94745487f, -0.95156091f, -0.95550388f, -0.95928317f, -0.96289814f,
-0.96634805f, -0.96963239f, -0.97275060f, -0.97570217f, -0.97848648f,
-0.98110318f, -0.98355180f, -0.98583186f, -0.98794299f, -0.98988485f,
-0.99165714f, -0.99325943f, -0.99469161f, -0.99595332f, -0.99704438f,
-0.99796462f, -0.99871385f, -0.99929196f, -0.99969882f, -0.99993443f,
0.99999464f, 0.99956632f, 0.99845290f, 0.99665523f, 0.99417448f,
0.99101239f, 0.98717111f, 0.98265326f, 0.97746199f, 0.97160077f,
0.96507365f, 0.95788515f, 0.95004016f, 0.94154406f, 0.93240267f,
0.92262226f, 0.91220951f, 0.90117162f, 0.88951606f, 0.87725091f,
0.86438453f, 0.85092574f, 0.83688372f, 0.82226819f, 0.80708915f,
0.79135692f, 0.77508235f, 0.75827658f, 0.74095112f, 0.72311783f,
0.70478898f, 0.68597710f, 0.66669506f, 0.64695615f, 0.62677377f,
0.60616189f, 0.58513457f, 0.56370622f, 0.54189157f, 0.51970547f,
0.49716324f, 0.47428027f, 0.45107225f, 0.42755505f, 0.40374488f,
0.37965798f, 0.35531086f, 0.33072025f, 0.30590299f, 0.28087607f,
0.25565663f, 0.23026201f, 0.20470956f, 0.17901683f, 0.15320139f,
0.12728097f, 0.10127331f, 0.075196236f, 0.049067631f, 0.022905400f,
-0.0032725304f, -0.029448219f, -0.055603724f, -0.081721120f, -0.10778251f,
-0.13377003f, -0.15966587f, -0.18545228f, -0.21111161f, -0.23662624f,
-0.26197869f, -0.28715160f, -0.31212771f, -0.33688989f, -0.36142120f,
-0.38570482f, -0.40972409f, -0.43346253f, -0.45690393f, -0.48003218f,
-0.50283146f, -0.52528608f, -0.54738069f, -0.56910020f, -0.59042966f,
-0.61135447f, -0.63186026f, -0.65193301f, -0.67155898f, -0.69072473f,
-0.70941705f, -0.72762316f, -0.74533063f, -0.76252723f, -0.77920127f,
-0.79534131f, -0.81093621f, -0.82597536f, -0.84044844f, -0.85434550f,
-0.86765707f, -0.88037395f, -0.89248747f, -0.90398932f, -0.91487163f,
-0.92512697f, -0.93474823f, -0.94372886f, -0.95206273f, -0.95974404f,
-0.96676767f, -0.97312868f, -0.97882277f, -0.98384601f, -0.98819500f,
-0.99186671f, -0.99485862f, -0.99716878f, -0.99879545f, -0.99973762f,
};
#endif
static const CELTMode mode48000_960_120 = {

View File

@ -0,0 +1,404 @@
/* The contents of this file was automatically generated by
* dump_mode_arm_ne10.c with arguments: 48000 960
* It contains static definitions for some pre-defined modes. */
#include <NE10_init.h>
#ifndef NE10_FFT_PARAMS48000_960
#define NE10_FFT_PARAMS48000_960
static const ne10_int32_t ne10_factors_480[64] = {
4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_240[64] = {
3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_120[64] = {
3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_60[64] = {
2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = {
{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f},
{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f},
{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f},
{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f},
{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f},
{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f},
{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f},
{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f},
{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f},
{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f},
{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f},
{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f},
{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f},
{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f},
{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f},
{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f},
{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f},
{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f},
{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f},
{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f},
{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f},
{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f},
{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f},
{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f},
{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f},
{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f},
{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f},
{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f},
{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f},
{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f},
{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f},
{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f},
{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f},
{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f},
{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f},
{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f},
{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f},
{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f},
{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f},
{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f},
{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f},
{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f},
{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f},
{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f},
{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f},
{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f},
{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f},
{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f},
{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f},
{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f},
{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f},
{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f},
{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f},
{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f},
{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f},
{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f},
{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f},
{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f},
{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f},
{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f},
{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f},
{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f},
{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f},
{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f},
{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f},
{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f},
{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f},
{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f},
{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f},
{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f},
{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f},
{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f},
{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f},
{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f},
{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f},
{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f},
{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f},
{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f},
{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f},
{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f},
{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f},
{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f},
{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f},
{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f},
{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f},
{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f},
{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f},
{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f},
{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f},
{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f},
{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f},
{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f},
{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f},
{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f},
{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f},
{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f},
{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f},
{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f},
{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f},
{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f},
};
static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = {
{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f},
{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f},
{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f},
{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f},
{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f},
{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f},
{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f},
{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f},
{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f},
{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f},
{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f},
{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f},
{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f},
{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f},
{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f},
{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f},
{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f},
{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f},
{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f},
{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f},
{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f},
{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f},
{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f},
{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f},
{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f},
{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f},
{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f},
{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f},
{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f},
{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f},
};
static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = {
{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
};
static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = {
{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
};
static const ne10_fft_state_float32_t ne10_fft_state_float32_t_480 = {
120,
(ne10_int32_t *)ne10_factors_480,
(ne10_fft_cpx_float32_t *)ne10_twiddles_480,
NULL,
(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120],
/* is_forward_scaled = true */
(ne10_int32_t) 1,
/* is_backward_scaled = false */
(ne10_int32_t) 0,
};
static const arch_fft_state cfg_arch_480 = {
1,
(void *)&ne10_fft_state_float32_t_480,
};
static const ne10_fft_state_float32_t ne10_fft_state_float32_t_240 = {
60,
(ne10_int32_t *)ne10_factors_240,
(ne10_fft_cpx_float32_t *)ne10_twiddles_240,
NULL,
(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60],
/* is_forward_scaled = true */
(ne10_int32_t) 1,
/* is_backward_scaled = false */
(ne10_int32_t) 0,
};
static const arch_fft_state cfg_arch_240 = {
1,
(void *)&ne10_fft_state_float32_t_240,
};
static const ne10_fft_state_float32_t ne10_fft_state_float32_t_120 = {
30,
(ne10_int32_t *)ne10_factors_120,
(ne10_fft_cpx_float32_t *)ne10_twiddles_120,
NULL,
(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30],
/* is_forward_scaled = true */
(ne10_int32_t) 1,
/* is_backward_scaled = false */
(ne10_int32_t) 0,
};
static const arch_fft_state cfg_arch_120 = {
1,
(void *)&ne10_fft_state_float32_t_120,
};
static const ne10_fft_state_float32_t ne10_fft_state_float32_t_60 = {
15,
(ne10_int32_t *)ne10_factors_60,
(ne10_fft_cpx_float32_t *)ne10_twiddles_60,
NULL,
(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15],
/* is_forward_scaled = true */
(ne10_int32_t) 1,
/* is_backward_scaled = false */
(ne10_int32_t) 0,
};
static const arch_fft_state cfg_arch_60 = {
1,
(void *)&ne10_fft_state_float32_t_60,
};
#endif /* end NE10_FFT_PARAMS48000_960 */

View File

@ -37,19 +37,23 @@
#include "os_support.h"
#include "bands.h"
#include "rate.h"
#include "pitch.h"
#ifndef OVERRIDE_vq_exp_rotation1
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
{
int i;
opus_val16 ms;
celt_norm *Xptr;
Xptr = X;
ms = NEG16(s);
for (i=0;i<len-stride;i++)
{
celt_norm x1, x2;
x1 = Xptr[0];
x2 = Xptr[stride];
Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
*Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
*Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
}
Xptr = &X[len-2*stride-1];
for (i=len-2*stride-1;i>=0;i--)
@ -57,10 +61,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
celt_norm x1, x2;
x1 = Xptr[0];
x2 = Xptr[stride];
Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
*Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
*Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
}
}
#endif /* OVERRIDE_vq_exp_rotation1 */
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
{
@ -91,7 +96,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
}
/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
extract_collapse_mask().*/
len /= stride;
len = celt_udiv(len, stride);
for (i=0;i<stride;i++)
{
if (dir < 0)
@ -140,53 +145,40 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
return 1;
/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
exp_rotation().*/
N0 = N/B;
N0 = celt_udiv(N, B);
collapse_mask = 0;
i=0; do {
int j;
unsigned tmp=0;
j=0; do {
collapse_mask |= (iy[i*N0+j]!=0)<<i;
tmp |= iy[i*N0+j];
} while (++j<N0);
collapse_mask |= (tmp!=0)<<i;
} while (++i<B);
return collapse_mask;
}
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
#ifdef RESYNTH
, opus_val16 gain
#endif
)
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
{
VARDECL(celt_norm, y);
VARDECL(int, iy);
VARDECL(opus_val16, signx);
VARDECL(int, signx);
int i, j;
opus_val16 s;
int pulsesLeft;
opus_val32 sum;
opus_val32 xy;
opus_val16 yy;
unsigned collapse_mask;
SAVE_STACK;
celt_assert2(K>0, "alg_quant() needs at least one pulse");
celt_assert2(N>1, "alg_quant() needs at least two dimensions");
(void)arch;
ALLOC(y, N, celt_norm);
ALLOC(iy, N, int);
ALLOC(signx, N, opus_val16);
exp_rotation(X, N, 1, B, K, spread);
ALLOC(signx, N, int);
/* Get rid of the sign */
sum = 0;
j=0; do {
if (X[j]>0)
signx[j]=1;
else {
signx[j]=-1;
X[j]=-X[j];
}
signx[j] = X[j]<0;
/* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
X[j] = ABS16(X[j]);
iy[j] = 0;
y[j] = 0;
} while (++j<N);
@ -218,7 +210,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
while (++j<N);
sum = QCONST16(1.f,14);
}
rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
#ifdef FIXED_POINT
rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
#else
/* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
rcp = EXTRACT16(MULT16_32_Q16(K+0.8, celt_rcp(sum)));
#endif
j=0; do {
#ifdef FIXED_POINT
/* It's really important to round *towards zero* here */
@ -233,7 +230,7 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
pulsesLeft -= iy[j];
} while (++j<N);
}
celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
celt_assert2(pulsesLeft>=0, "Allocated too many pulses in the quick pass");
/* This should never happen, but just in case it does (e.g. on silence)
we fill the first bin with pulses. */
@ -249,12 +246,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
pulsesLeft=0;
}
s = 1;
for (i=0;i<pulsesLeft;i++)
{
opus_val16 Rxy, Ryy;
int best_id;
opus_val32 best_num = -VERY_LARGE16;
opus_val16 best_den = 0;
opus_val32 best_num;
opus_val16 best_den;
#ifdef FIXED_POINT
int rshift;
#endif
@ -264,10 +261,23 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
best_id = 0;
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
yy = ADD32(yy, 1);
j=0;
yy = ADD16(yy, 1);
/* Calculations for position 0 are out of the loop, in part to reduce
mispredicted branches (since the if condition is usually false)
in the loop. */
/* Temporary sums of the new pulse(s) */
Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
/* We're multiplying y[j] by two so we don't have to do it here */
Ryy = ADD16(yy, y[0]);
/* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
Rxy is positive because the sign is pre-computed) */
Rxy = MULT16_16_Q15(Rxy,Rxy);
best_den = Ryy;
best_num = Rxy;
j=1;
do {
opus_val16 Rxy, Ryy;
/* Temporary sums of the new pulse(s) */
Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
/* We're multiplying y[j] by two so we don't have to do it here */
@ -278,8 +288,11 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
Rxy = MULT16_16_Q15(Rxy,Rxy);
/* The idea is to check for num/den >= best_num/best_den, but that way
we can do it without any division */
/* OPT: Make sure to use conditional moves here */
if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
/* OPT: It's not clear whether a cmov is faster than a branch here
since the condition is more often false than true and using
a cmov introduces data dependencies across iterations. The optimal
choice may be architecture-dependent. */
if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
{
best_den = Ryy;
best_num = Rxy;
@ -294,23 +307,47 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
/* Only now that we've made the final choice, update y/iy */
/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
y[best_id] += 2*s;
y[best_id] += 2;
iy[best_id]++;
}
/* Put the original sign back */
j=0;
do {
X[j] = MULT16_16(signx[j],X[j]);
if (signx[j] < 0)
iy[j] = -iy[j];
/*iy[j] = signx[j] ? -iy[j] : iy[j];*/
/* OPT: The is more likely to be compiled without a branch than the code above
but has the same performance otherwise. */
iy[j] = (iy[j]^-signx[j]) + signx[j];
} while (++j<N);
RESTORE_STACK;
return yy;
}
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
opus_val16 gain, int resynth, int arch)
{
VARDECL(int, iy);
opus_val16 yy;
unsigned collapse_mask;
SAVE_STACK;
celt_assert2(K>0, "alg_quant() needs at least one pulse");
celt_assert2(N>1, "alg_quant() needs at least two dimensions");
/* Covers vectorization by up to 4. */
ALLOC(iy, N+3, int);
exp_rotation(X, N, 1, B, K, spread);
yy = op_pvq_search(X, iy, K, N, arch);
encode_pulses(iy, N, K, enc);
#ifdef RESYNTH
normalise_residual(iy, X, N, yy, gain);
exp_rotation(X, N, -1, B, K, spread);
#endif
if (resynth)
{
normalise_residual(iy, X, N, yy, gain);
exp_rotation(X, N, -1, B, K, spread);
}
collapse_mask = extract_collapse_mask(iy, N, B);
RESTORE_STACK;
@ -322,7 +359,6 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
ec_dec *dec, opus_val16 gain)
{
int i;
opus_val32 Ryy;
unsigned collapse_mask;
VARDECL(int, iy);
@ -331,12 +367,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
celt_assert2(K>0, "alg_unquant() needs at least one pulse");
celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
ALLOC(iy, N, int);
decode_pulses(iy, N, K, dec);
Ryy = 0;
i=0;
do {
Ryy = MAC16_16(Ryy, iy[i], iy[i]);
} while (++i < N);
Ryy = decode_pulses(iy, N, K, dec);
normalise_residual(iy, X, N, Ryy, gain);
exp_rotation(X, N, -1, B, K, spread);
collapse_mask = extract_collapse_mask(iy, N, B);
@ -344,21 +375,18 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
return collapse_mask;
}
void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
#ifndef OVERRIDE_renormalise_vector
void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
{
int i;
#ifdef FIXED_POINT
int k;
#endif
opus_val32 E = EPSILON;
opus_val32 E;
opus_val16 g;
opus_val32 t;
celt_norm *xptr = X;
for (i=0;i<N;i++)
{
E = MAC16_16(E, *xptr, *xptr);
xptr++;
}
celt_norm *xptr;
E = EPSILON + celt_inner_prod(X, X, N, arch);
#ifdef FIXED_POINT
k = celt_ilog2(E)>>1;
#endif
@ -373,8 +401,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
}
/*return celt_sqrt(E);*/
}
#endif /* OVERRIDE_renormalise_vector */
int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
{
int i;
int itheta;
@ -393,14 +422,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
Eside = MAC16_16(Eside, s, s);
}
} else {
for (i=0;i<N;i++)
{
celt_norm m, s;
m = X[i];
s = Y[i];
Emid = MAC16_16(Emid, m, m);
Eside = MAC16_16(Eside, s, s);
}
Emid += celt_inner_prod(X, X, N, arch);
Eside += celt_inner_prod(Y, Y, N, arch);
}
mid = celt_sqrt(Emid);
side = celt_sqrt(Eside);
@ -408,7 +431,7 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
/* 0.63662 = 2/pi */
itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
#else
itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
itheta = (int)floor(.5f+16384*0.63662f*fast_atan2f(side,mid));
#endif
return itheta;

View File

@ -37,6 +37,21 @@
#include "entdec.h"
#include "modes.h"
#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT))
#include "x86/vq_sse.h"
#endif
#if defined(MIPSr1_ASM)
#include "mips/vq_mipsr1.h"
#endif
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch);
#if !defined(OVERRIDE_OP_PVQ_SEARCH)
#define op_pvq_search(x, iy, K, N, arch) \
(op_pvq_search_c(x, iy, K, N, arch))
#endif
/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
* the pitch and a combination of pulses such that its norm is still equal
* to 1. This is the function that will typically require the most CPU.
@ -46,12 +61,8 @@
* @param enc Entropy encoder state
* @ret A mask indicating which blocks in the band received pulses
*/
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
ec_enc *enc
#ifdef RESYNTH
, opus_val16 gain
#endif
);
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
opus_val16 gain, int resynth, int arch);
/** Algebraic pulse decoder
* @param X Decoded normalised spectrum (returned)
@ -63,8 +74,8 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
ec_dec *dec, opus_val16 gain);
void renormalise_vector(celt_norm *X, int N, opus_val16 gain);
void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch);
int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N);
int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch);
#endif /* VQ_H */

View File

@ -0,0 +1,132 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xmmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "pitch.h"
#include "x86cpu.h"
#if defined(FIXED_POINT)
void celt_fir_sse4_1(const opus_val16 *_x,
const opus_val16 *num,
opus_val16 *_y,
int N,
int ord,
opus_val16 *mem,
int arch)
{
int i,j;
VARDECL(opus_val16, rnum);
VARDECL(opus_val16, x);
__m128i vecNoA;
opus_int32 noA ;
SAVE_STACK;
ALLOC(rnum, ord, opus_val16);
ALLOC(x, N+ord, opus_val16);
for(i=0;i<ord;i++)
rnum[i] = num[ord-i-1];
for(i=0;i<ord;i++)
x[i] = mem[ord-i-1];
for (i=0;i<N-7;i+=8)
{
x[i+ord ]=_x[i ];
x[i+ord+1]=_x[i+1];
x[i+ord+2]=_x[i+2];
x[i+ord+3]=_x[i+3];
x[i+ord+4]=_x[i+4];
x[i+ord+5]=_x[i+5];
x[i+ord+6]=_x[i+6];
x[i+ord+7]=_x[i+7];
}
for (;i<N-3;i+=4)
{
x[i+ord ]=_x[i ];
x[i+ord+1]=_x[i+1];
x[i+ord+2]=_x[i+2];
x[i+ord+3]=_x[i+3];
}
for (;i<N;i++)
x[i+ord]=_x[i];
for(i=0;i<ord;i++)
mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT
for (i=0;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
{
sum = MAC16_16(sum,rnum[j],x[i+j]);
}
_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
}
#else
noA = EXTEND32(1) << SIG_SHIFT >> 1;
vecNoA = _mm_set_epi32(noA, noA, noA, noA);
for (i=0;i<N-3;i+=4)
{
opus_val32 sums[4] = {0};
__m128i vecSum, vecX;
xcorr_kernel(rnum, x+i, sums, ord, arch);
vecSum = _mm_loadu_si128((__m128i *)sums);
vecSum = _mm_add_epi32(vecSum, vecNoA);
vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
vecX = OP_CVTEPI16_EPI32_M64(_x + i);
vecSum = _mm_add_epi32(vecSum, vecX);
vecSum = _mm_packs_epi32(vecSum, vecSum);
_mm_storel_epi64((__m128i *)(_y + i), vecSum);
}
for (;i<N;i++)
{
opus_val32 sum = 0;
for (j=0;j<ord;j++)
sum = MAC16_16(sum, rnum[j], x[i + j]);
_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
}
#endif
RESTORE_STACK;
}
#endif

View File

@ -0,0 +1,68 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CELT_LPC_SSE_H
#define CELT_LPC_SSE_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
#define OVERRIDE_CELT_FIR
void celt_fir_sse4_1(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
#if defined(OPUS_X86_PRESUME_SSE4_1)
#define celt_fir(x, num, y, N, ord, mem, arch) \
((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch))
#else
extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
# define celt_fir(x, num, y, N, ord, mem, arch) \
((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch))
#endif
#endif
#endif

View File

@ -0,0 +1,185 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "macros.h"
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "pitch.h"
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
#include <xmmintrin.h>
#include "arch.h"
void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
{
int j;
__m128 xsum1, xsum2;
xsum1 = _mm_loadu_ps(sum);
xsum2 = _mm_setzero_ps();
for (j = 0; j < len-3; j += 4)
{
__m128 x0 = _mm_loadu_ps(x+j);
__m128 yj = _mm_loadu_ps(y+j);
__m128 y3 = _mm_loadu_ps(y+j+3);
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
_mm_shuffle_ps(yj,y3,0x49)));
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
_mm_shuffle_ps(yj,y3,0x9e)));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
}
if (j < len)
{
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
if (++j < len)
{
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
if (++j < len)
{
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
}
}
}
_mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
}
void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
__m128 xsum1, xsum2;
xsum1 = _mm_setzero_ps();
xsum2 = _mm_setzero_ps();
for (i=0;i<N-3;i+=4)
{
__m128 xi = _mm_loadu_ps(x+i);
__m128 y1i = _mm_loadu_ps(y01+i);
__m128 y2i = _mm_loadu_ps(y02+i);
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
}
/* Horizontal sum */
xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
_mm_store_ss(xy1, xsum1);
xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
_mm_store_ss(xy2, xsum2);
for (;i<N;i++)
{
*xy1 = MAC16_16(*xy1, x[i], y01[i]);
*xy2 = MAC16_16(*xy2, x[i], y02[i]);
}
}
opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,
int N)
{
int i;
float xy;
__m128 sum;
sum = _mm_setzero_ps();
/* FIXME: We should probably go 8-way and use 2 sums. */
for (i=0;i<N-3;i+=4)
{
__m128 xi = _mm_loadu_ps(x+i);
__m128 yi = _mm_loadu_ps(y+i);
sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));
}
/* Horizontal sum */
sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
_mm_store_ss(&xy, sum);
for (;i<N;i++)
{
xy = MAC16_16(xy, x[i], y[i]);
}
return xy;
}
void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
int i;
__m128 x0v;
__m128 g10v, g11v, g12v;
g10v = _mm_load1_ps(&g10);
g11v = _mm_load1_ps(&g11);
g12v = _mm_load1_ps(&g12);
x0v = _mm_loadu_ps(&x[-T-2]);
for (i=0;i<N-3;i+=4)
{
__m128 yi, yi2, x1v, x2v, x3v, x4v;
const opus_val32 *xp = &x[i-T-2];
yi = _mm_loadu_ps(x+i);
x4v = _mm_loadu_ps(xp+4);
#if 0
/* Slower version with all loads */
x1v = _mm_loadu_ps(xp+1);
x2v = _mm_loadu_ps(xp+2);
x3v = _mm_loadu_ps(xp+3);
#else
x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
#endif
yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
#else
/* Use partial sums */
yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
_mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
yi = _mm_add_ps(yi, yi2);
#endif
x0v=x4v;
_mm_storeu_ps(y+i, yi);
}
#ifdef CUSTOM_MODES
for (;i<N;i++)
{
y[i] = x[i]
+ MULT16_32_Q15(g10,x[i-T])
+ MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
+ MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
}
#endif
}
#endif

View File

@ -1,4 +1,5 @@
/* Copyright (c) 2013 Jean-Marc Valin and John Ridges */
/* Copyright (c) 2013 Jean-Marc Valin and John Ridges
Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/
/**
@file pitch_sse.h
@brief Pitch analysis
@ -32,125 +33,160 @@
#ifndef PITCH_SSE_H
#define PITCH_SSE_H
#include <xmmintrin.h>
#include "arch.h"
#if defined(HAVE_CONFIG_H)
#include "config.h"
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
void xcorr_kernel_sse4_1(
const opus_int16 *x,
const opus_int16 *y,
opus_val32 sum[4],
int len);
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
void xcorr_kernel_sse(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
#endif
#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)arch, xcorr_kernel_sse4_1(x, y, sum, len))
#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)arch, xcorr_kernel_sse(x, y, sum, len))
#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
#define OVERRIDE_XCORR_KERNEL
static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
{
int j;
__m128 xsum1, xsum2;
xsum1 = _mm_loadu_ps(sum);
xsum2 = _mm_setzero_ps();
#define xcorr_kernel(x, y, sum, len, arch) \
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
for (j = 0; j < len-3; j += 4)
{
__m128 x0 = _mm_loadu_ps(x+j);
__m128 yj = _mm_loadu_ps(y+j);
__m128 y3 = _mm_loadu_ps(y+j+3);
#endif
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
_mm_shuffle_ps(yj,y3,0x49)));
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
_mm_shuffle_ps(yj,y3,0x9e)));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
}
if (j < len)
{
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
if (++j < len)
{
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
if (++j < len)
{
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
}
}
}
_mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
}
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse4_1(
const opus_int16 *x,
const opus_int16 *y,
int N);
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse2(
const opus_int16 *x,
const opus_int16 *y,
int N);
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse(
const opus_val16 *x,
const opus_val16 *y,
int N);
#endif
#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
#define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \
((void)arch, celt_inner_prod_sse4_1(x, y, N))
#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
#define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \
((void)arch, celt_inner_prod_sse2(x, y, N))
#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
#define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \
((void)arch, celt_inner_prod_sse(x, y, N))
#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
(defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
int N);
#define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \
((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N))
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
#define OVERRIDE_DUAL_INNER_PROD
static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
__m128 xsum1, xsum2;
xsum1 = _mm_setzero_ps();
xsum2 = _mm_setzero_ps();
for (i=0;i<N-3;i+=4)
{
__m128 xi = _mm_loadu_ps(x+i);
__m128 y1i = _mm_loadu_ps(y01+i);
__m128 y2i = _mm_loadu_ps(y02+i);
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
}
/* Horizontal sum */
xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
_mm_store_ss(xy1, xsum1);
xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
_mm_store_ss(xy2, xsum2);
for (;i<N;i++)
{
*xy1 = MAC16_16(*xy1, x[i], y01[i]);
*xy2 = MAC16_16(*xy2, x[i], y02[i]);
}
}
#define OVERRIDE_COMB_FILTER_CONST
static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
int i;
__m128 x0v;
__m128 g10v, g11v, g12v;
g10v = _mm_load1_ps(&g10);
g11v = _mm_load1_ps(&g11);
g12v = _mm_load1_ps(&g12);
x0v = _mm_loadu_ps(&x[-T-2]);
for (i=0;i<N-3;i+=4)
{
__m128 yi, yi2, x1v, x2v, x3v, x4v;
const opus_val32 *xp = &x[i-T-2];
yi = _mm_loadu_ps(x+i);
x4v = _mm_loadu_ps(xp+4);
#if 0
/* Slower version with all loads */
x1v = _mm_loadu_ps(xp+1);
x2v = _mm_loadu_ps(xp+2);
x3v = _mm_loadu_ps(xp+3);
#else
x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
#endif
yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
#undef dual_inner_prod
#undef comb_filter_const
void dual_inner_prod_sse(const opus_val16 *x,
const opus_val16 *y01,
const opus_val16 *y02,
int N,
opus_val32 *xy1,
opus_val32 *xy2);
void comb_filter_const_sse(opus_val32 *y,
opus_val32 *x,
int T,
int N,
opus_val16 g10,
opus_val16 g11,
opus_val16 g12);
#if defined(OPUS_X86_PRESUME_SSE)
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
#else
/* Use partial sums */
yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
_mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
yi = _mm_add_ps(yi, yi2);
#endif
x0v=x4v;
_mm_storeu_ps(y+i, yi);
}
#ifdef CUSTOM_MODES
for (;i<N;i++)
{
y[i] = x[i]
+ MULT16_32_Q15(g10,x[i-T])
+ MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
+ MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
}
#endif
}
extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y01,
const opus_val16 *y02,
int N,
opus_val32 *xy1,
opus_val32 *xy2);
#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
opus_val32 *y,
opus_val32 *x,
int T,
int N,
opus_val16 g10,
opus_val16 g11,
opus_val16 g12);
#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
#define NON_STATIC_COMB_FILTER_CONST_C
#endif
#endif
#endif

View File

@ -0,0 +1,95 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xmmintrin.h>
#include <emmintrin.h>
#include "macros.h"
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "pitch.h"
#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y,
int N)
{
opus_int i, dataSize16;
opus_int32 sum;
__m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
__m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
sum = 0;
dataSize16 = N & ~15;
acc1 = _mm_setzero_si128();
acc2 = _mm_setzero_si128();
for (i=0;i<dataSize16;i+=16)
{
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
}
acc1 = _mm_add_epi32( acc1, acc2 );
if (N - i >= 8)
{
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
i += 8;
}
acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1));
acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E));
sum += _mm_cvtsi128_si32(acc1);
for (;i<N;i++) {
sum = silk_SMLABB(sum, x[i], y[i]);
}
return sum;
}
#endif

View File

@ -0,0 +1,195 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xmmintrin.h>
#include <emmintrin.h>
#include "macros.h"
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "pitch.h"
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
#include <smmintrin.h>
#include "x86cpu.h"
opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y,
int N)
{
opus_int i, dataSize16;
opus_int32 sum;
__m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
__m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
__m128i inVec1_3210, inVec2_3210;
sum = 0;
dataSize16 = N & ~15;
acc1 = _mm_setzero_si128();
acc2 = _mm_setzero_si128();
for (i=0;i<dataSize16;i+=16) {
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
}
acc1 = _mm_add_epi32(acc1, acc2);
if (N - i >= 8)
{
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
i += 8;
}
if (N - i >= 4)
{
inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]);
inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]);
inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210);
acc1 = _mm_add_epi32(acc1, inVec1_3210);
i += 4;
}
acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1));
acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E));
sum += _mm_cvtsi128_si32(acc1);
for (;i<N;i++)
{
sum = silk_SMLABB(sum, x[i], y[i]);
}
return sum;
}
void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[ 4 ], int len)
{
int j;
__m128i vecX, vecX0, vecX1, vecX2, vecX3;
__m128i vecY0, vecY1, vecY2, vecY3;
__m128i sum0, sum1, sum2, sum3, vecSum;
__m128i initSum;
celt_assert(len >= 3);
sum0 = _mm_setzero_si128();
sum1 = _mm_setzero_si128();
sum2 = _mm_setzero_si128();
sum3 = _mm_setzero_si128();
for (j=0;j<(len-7);j+=8)
{
vecX = _mm_loadu_si128((__m128i *)(&x[j + 0]));
vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0]));
vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1]));
vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2]));
vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3]));
sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0));
sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1));
sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2));
sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3));
}
sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0));
sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E));
sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1));
sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E));
sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2));
sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E));
sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3));
sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E));
vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1),
_mm_unpacklo_epi32(sum2, sum3));
for (;j<(len-3);j+=4)
{
vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
vecX0 = _mm_shuffle_epi32(vecX, 0x00);
vecX1 = _mm_shuffle_epi32(vecX, 0x55);
vecX2 = _mm_shuffle_epi32(vecX, 0xaa);
vecX3 = _mm_shuffle_epi32(vecX, 0xff);
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);
vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]);
vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]);
sum0 = _mm_mullo_epi32(vecX0, vecY0);
sum1 = _mm_mullo_epi32(vecX1, vecY1);
sum2 = _mm_mullo_epi32(vecX2, vecY2);
sum3 = _mm_mullo_epi32(vecX3, vecY3);
sum0 = _mm_add_epi32(sum0, sum1);
sum2 = _mm_add_epi32(sum2, sum3);
vecSum = _mm_add_epi32(vecSum, sum0);
vecSum = _mm_add_epi32(vecSum, sum2);
}
for (;j<len;j++)
{
vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
vecX0 = _mm_shuffle_epi32(vecX, 0x00);
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
sum0 = _mm_mullo_epi32(vecX0, vecY0);
vecSum = _mm_add_epi32(vecSum, sum0);
}
initSum = _mm_loadu_si128((__m128i *)(&sum[0]));
initSum = _mm_add_epi32(initSum, vecSum);
_mm_storeu_si128((__m128i *)sum, initSum);
}
#endif

View File

@ -0,0 +1,50 @@
/* Copyright (c) 2016 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef VQ_SSE_H
#define VQ_SSE_H
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
#define OVERRIDE_OP_PVQ_SEARCH
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);
#if defined(OPUS_X86_PRESUME_SSE2)
#define op_pvq_search(x, iy, K, N, arch) \
(op_pvq_search_sse2(x, iy, K, N, arch))
#else
extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
celt_norm *_X, int *iy, int K, int N, int arch);
# define op_pvq_search(X, iy, K, N, arch) \
((*OP_PVQ_SEARCH_IMPL[(arch) & OPUS_ARCHMASK])(X, iy, K, N, arch))
#endif
#endif
#endif

View File

@ -0,0 +1,218 @@
/* Copyright (c) 2007-2008 CSIRO
Copyright (c) 2007-2009 Xiph.Org Foundation
Copyright (c) 2007-2016 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xmmintrin.h>
#include <emmintrin.h>
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "vq.h"
#include "x86cpu.h"
#ifndef FIXED_POINT
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
{
int i, j;
int pulsesLeft;
float xy, yy;
VARDECL(celt_norm, y);
VARDECL(celt_norm, X);
VARDECL(float, signy);
__m128 signmask;
__m128 sums;
__m128i fours;
SAVE_STACK;
(void)arch;
/* All bits set to zero, except for the sign bit. */
signmask = _mm_set_ps1(-0.f);
fours = _mm_set_epi32(4, 4, 4, 4);
ALLOC(y, N+3, celt_norm);
ALLOC(X, N+3, celt_norm);
ALLOC(signy, N+3, float);
OPUS_COPY(X, _X, N);
X[N] = X[N+1] = X[N+2] = 0;
sums = _mm_setzero_ps();
for (j=0;j<N;j+=4)
{
__m128 x4, s4;
x4 = _mm_loadu_ps(&X[j]);
s4 = _mm_cmplt_ps(x4, _mm_setzero_ps());
/* Get rid of the sign */
x4 = _mm_andnot_ps(signmask, x4);
sums = _mm_add_ps(sums, x4);
/* Clear y and iy in case we don't do the projection. */
_mm_storeu_ps(&y[j], _mm_setzero_ps());
_mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
_mm_storeu_ps(&X[j], x4);
_mm_storeu_ps(&signy[j], s4);
}
sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 0, 3, 2)));
sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(2, 3, 0, 1)));
xy = yy = 0;
pulsesLeft = K;
/* Do a pre-search by projecting on the pyramid */
if (K > (N>>1))
{
__m128i pulses_sum;
__m128 yy4, xy4;
__m128 rcp4;
opus_val32 sum = _mm_cvtss_f32(sums);
/* If X is too small, just replace it with a pulse at 0 */
/* Prevents infinities and NaNs from causing too many pulses
to be allocated. 64 is an approximation of infinity here. */
if (!(sum > EPSILON && sum < 64))
{
X[0] = QCONST16(1.f,14);
j=1; do
X[j]=0;
while (++j<N);
sums = _mm_set_ps1(1.f);
}
/* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
rcp4 = _mm_mul_ps(_mm_set_ps1((float)(K+.8)), _mm_rcp_ps(sums));
xy4 = yy4 = _mm_setzero_ps();
pulses_sum = _mm_setzero_si128();
for (j=0;j<N;j+=4)
{
__m128 rx4, x4, y4;
__m128i iy4;
x4 = _mm_loadu_ps(&X[j]);
rx4 = _mm_mul_ps(x4, rcp4);
iy4 = _mm_cvttps_epi32(rx4);
pulses_sum = _mm_add_epi32(pulses_sum, iy4);
_mm_storeu_si128((__m128i*)&iy[j], iy4);
y4 = _mm_cvtepi32_ps(iy4);
xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
/* double the y[] vector so we don't have to do it in the search loop. */
_mm_storeu_ps(&y[j], _mm_add_ps(y4, y4));
}
pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(1, 0, 3, 2)));
pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(2, 3, 0, 1)));
pulsesLeft -= _mm_cvtsi128_si32(pulses_sum);
xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(1, 0, 3, 2)));
xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(2, 3, 0, 1)));
xy = _mm_cvtss_f32(xy4);
yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(1, 0, 3, 2)));
yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(2, 3, 0, 1)));
yy = _mm_cvtss_f32(yy4);
}
X[N] = X[N+1] = X[N+2] = -100;
y[N] = y[N+1] = y[N+2] = 100;
celt_assert2(pulsesLeft>=0, "Allocated too many pulses in the quick pass");
/* This should never happen, but just in case it does (e.g. on silence)
we fill the first bin with pulses. */
if (pulsesLeft > N+3)
{
opus_val16 tmp = (opus_val16)pulsesLeft;
yy = MAC16_16(yy, tmp, tmp);
yy = MAC16_16(yy, tmp, y[0]);
iy[0] += pulsesLeft;
pulsesLeft=0;
}
for (i=0;i<pulsesLeft;i++)
{
int best_id;
__m128 xy4, yy4;
__m128 max, max2;
__m128i count;
__m128i pos;
best_id = 0;
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
yy = ADD16(yy, 1);
xy4 = _mm_load1_ps(&xy);
yy4 = _mm_load1_ps(&yy);
max = _mm_setzero_ps();
pos = _mm_setzero_si128();
count = _mm_set_epi32(3, 2, 1, 0);
for (j=0;j<N;j+=4)
{
__m128 x4, y4, r4;
x4 = _mm_loadu_ps(&X[j]);
y4 = _mm_loadu_ps(&y[j]);
x4 = _mm_add_ps(x4, xy4);
y4 = _mm_add_ps(y4, yy4);
y4 = _mm_rsqrt_ps(y4);
r4 = _mm_mul_ps(x4, y4);
/* Update the index of the max. */
pos = _mm_max_epi16(pos, _mm_and_si128(count, _mm_castps_si128(_mm_cmpgt_ps(r4, max))));
/* Update the max. */
max = _mm_max_ps(max, r4);
/* Update the indices (+4) */
count = _mm_add_epi32(count, fours);
}
/* Horizontal max */
max2 = _mm_max_ps(max, _mm_shuffle_ps(max, max, _MM_SHUFFLE(1, 0, 3, 2)));
max2 = _mm_max_ps(max2, _mm_shuffle_ps(max2, max2, _MM_SHUFFLE(2, 3, 0, 1)));
/* Now that max2 contains the max at all positions, look at which value(s) of the
partial max is equal to the global max. */
pos = _mm_and_si128(pos, _mm_castps_si128(_mm_cmpeq_ps(max, max2)));
pos = _mm_max_epi16(pos, _mm_unpackhi_epi64(pos, pos));
pos = _mm_max_epi16(pos, _mm_shufflelo_epi16(pos, _MM_SHUFFLE(1, 0, 3, 2)));
best_id = _mm_cvtsi128_si32(pos);
/* Updating the sums of the new pulse(s) */
xy = ADD32(xy, EXTEND32(X[best_id]));
/* We're multiplying y[j] by two so we don't have to do it here */
yy = ADD16(yy, y[best_id]);
/* Only now that we've made the final choice, update y/iy */
/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
y[best_id] += 2;
iy[best_id]++;
}
/* Put the original sign back */
for (j=0;j<N;j+=4)
{
__m128i y4;
__m128i s4;
y4 = _mm_loadu_si128((__m128i*)&iy[j]);
s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
_mm_storeu_si128((__m128i*)&iy[j], y4);
}
RESTORE_STACK;
return yy;
}
#endif

View File

@ -0,0 +1,168 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(HAVE_CONFIG_H)
#include "config.h"
#endif
#include "x86/x86cpu.h"
#include "celt_lpc.h"
#include "pitch.h"
#include "pitch_sse.h"
#include "vq.h"
#if defined(OPUS_HAVE_RTCD)
# if defined(FIXED_POINT)
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)
void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch
) = {
celt_fir_c, /* non-sse */
celt_fir_c,
celt_fir_c,
MAY_HAVE_SSE4_1(celt_fir), /* sse4.1 */
MAY_HAVE_SSE4_1(celt_fir) /* avx */
};
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len
) = {
xcorr_kernel_c, /* non-sse */
xcorr_kernel_c,
xcorr_kernel_c,
MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1 */
MAY_HAVE_SSE4_1(xcorr_kernel) /* avx */
};
#endif
#if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
int N
) = {
celt_inner_prod_c, /* non-sse */
celt_inner_prod_c,
MAY_HAVE_SSE2(celt_inner_prod),
MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1 */
MAY_HAVE_SSE4_1(celt_inner_prod) /* avx */
};
#endif
# else
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len
) = {
xcorr_kernel_c, /* non-sse */
MAY_HAVE_SSE(xcorr_kernel),
MAY_HAVE_SSE(xcorr_kernel),
MAY_HAVE_SSE(xcorr_kernel),
MAY_HAVE_SSE(xcorr_kernel)
};
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
int N
) = {
celt_inner_prod_c, /* non-sse */
MAY_HAVE_SSE(celt_inner_prod),
MAY_HAVE_SSE(celt_inner_prod),
MAY_HAVE_SSE(celt_inner_prod),
MAY_HAVE_SSE(celt_inner_prod)
};
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y01,
const opus_val16 *y02,
int N,
opus_val32 *xy1,
opus_val32 *xy2
) = {
dual_inner_prod_c, /* non-sse */
MAY_HAVE_SSE(dual_inner_prod),
MAY_HAVE_SSE(dual_inner_prod),
MAY_HAVE_SSE(dual_inner_prod),
MAY_HAVE_SSE(dual_inner_prod)
};
void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
opus_val32 *y,
opus_val32 *x,
int T,
int N,
opus_val16 g10,
opus_val16 g11,
opus_val16 g12
) = {
comb_filter_const_c, /* non-sse */
MAY_HAVE_SSE(comb_filter_const),
MAY_HAVE_SSE(comb_filter_const),
MAY_HAVE_SSE(comb_filter_const),
MAY_HAVE_SSE(comb_filter_const)
};
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)
opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
celt_norm *_X, int *iy, int K, int N, int arch
) = {
op_pvq_search_c, /* non-sse */
op_pvq_search_c,
MAY_HAVE_SSE2(op_pvq_search),
MAY_HAVE_SSE2(op_pvq_search),
MAY_HAVE_SSE2(op_pvq_search)
};
#endif
#endif
#endif

View File

@ -0,0 +1,157 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "cpu_support.h"
#include "macros.h"
#include "main.h"
#include "pitch.h"
#include "x86cpu.h"
#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
#if defined(_MSC_VER)
#include <intrin.h>
static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
{
__cpuid((int*)CPUInfo, InfoType);
}
#else
#if defined(CPU_INFO_BY_C)
#include <cpuid.h>
#endif
static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
{
#if defined(CPU_INFO_BY_ASM)
#if defined(__i386__) && defined(__PIC__)
/* %ebx is PIC register in 32-bit, so mustn't clobber it. */
__asm__ __volatile__ (
"xchg %%ebx, %1\n"
"cpuid\n"
"xchg %%ebx, %1\n":
"=a" (CPUInfo[0]),
"=r" (CPUInfo[1]),
"=c" (CPUInfo[2]),
"=d" (CPUInfo[3]) :
"0" (InfoType)
);
#else
__asm__ __volatile__ (
"cpuid":
"=a" (CPUInfo[0]),
"=b" (CPUInfo[1]),
"=c" (CPUInfo[2]),
"=d" (CPUInfo[3]) :
"0" (InfoType)
);
#endif
#elif defined(CPU_INFO_BY_C)
__get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3]));
#endif
}
#endif
typedef struct CPU_Feature{
/* SIMD: 128-bit */
int HW_SSE;
int HW_SSE2;
int HW_SSE41;
/* SIMD: 256-bit */
int HW_AVX;
} CPU_Feature;
static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
{
unsigned int info[4] = {0};
unsigned int nIds = 0;
cpuid(info, 0);
nIds = info[0];
if (nIds >= 1){
cpuid(info, 1);
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
}
else {
cpu_feature->HW_SSE = 0;
cpu_feature->HW_SSE2 = 0;
cpu_feature->HW_SSE41 = 0;
cpu_feature->HW_AVX = 0;
}
}
int opus_select_arch(void)
{
CPU_Feature cpu_feature;
int arch;
opus_cpu_feature_check(&cpu_feature);
arch = 0;
if (!cpu_feature.HW_SSE)
{
return arch;
}
arch++;
if (!cpu_feature.HW_SSE2)
{
return arch;
}
arch++;
if (!cpu_feature.HW_SSE41)
{
return arch;
}
arch++;
if (!cpu_feature.HW_AVX)
{
return arch;
}
arch++;
return arch;
}
#endif

View File

@ -0,0 +1,93 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(X86CPU_H)
# define X86CPU_H
# if defined(OPUS_X86_MAY_HAVE_SSE)
# define MAY_HAVE_SSE(name) name ## _sse
# else
# define MAY_HAVE_SSE(name) name ## _c
# endif
# if defined(OPUS_X86_MAY_HAVE_SSE2)
# define MAY_HAVE_SSE2(name) name ## _sse2
# else
# define MAY_HAVE_SSE2(name) name ## _c
# endif
# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
# define MAY_HAVE_SSE4_1(name) name ## _sse4_1
# else
# define MAY_HAVE_SSE4_1(name) name ## _c
# endif
# if defined(OPUS_X86_MAY_HAVE_AVX)
# define MAY_HAVE_AVX(name) name ## _avx
# else
# define MAY_HAVE_AVX(name) name ## _c
# endif
# if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void);
# endif
/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
reference, these require 16-byte alignment and load a full 16 bytes (instead
of 4 or 8), possibly reading out of bounds.
We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
_mm_loadl_epi64(), which should have the same semantics as an m32 or m64
reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
optimize this out when optimizations ARE enabled.
Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
(which is fair, since technically the compiler is always allowed to do the
dereference before invoking the function implementing the intrinsic).
However, it is smart enough to eliminate the extra MOVD instruction.
For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
the extra MOVQ if it's specified explicitly */
# if defined(__clang__) || !defined(__OPTIMIZE__)
# define OP_CVTEPI8_EPI32_M32(x) \
(_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
# else
# define OP_CVTEPI8_EPI32_M32(x) \
(_mm_cvtepi8_epi32(*(__m128i *)(x)))
#endif
# if !defined(__OPTIMIZE__)
# define OP_CVTEPI16_EPI32_M64(x) \
(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
# else
# define OP_CVTEPI16_EPI32_M64(x) \
(_mm_cvtepi16_epi32(*(__m128i *)(x)))
# endif
#endif

View File

@ -142,7 +142,7 @@ extern "C" {
*
* opus_encode() and opus_encode_float() return the number of bytes actually written to the packet.
* The return value <b>can be negative</b>, which indicates that an error has occurred. If the return value
* is 1 byte, then the packet does not need to be transmitted (DTX).
* is 2 bytes or less, then the packet does not need to be transmitted (DTX).
*
* Once the encoder state if no longer needed, it can be destroyed with
*
@ -616,7 +616,10 @@ OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, fl
* merged. Splitting valid Opus packets is always guaranteed to succeed,
* whereas merging valid packets only succeeds if all frames have the same
* mode, bandwidth, and frame size, and when the total duration of the merged
* packet is no more than 120 ms.
* packet is no more than 120 ms. The 120 ms limit comes from the
* specification and limits decoder memory requirements at a point where
* framing overhead becomes negligible.
*
* The repacketizer currently only operates on elementary Opus
* streams. It will not manipualte multistream packets successfully, except in
* the degenerate case where they consist of data from a single stream.

View File

@ -46,7 +46,7 @@ extern "C" {
#define OPUS_OK 0
/** One or more invalid/out of range arguments @hideinitializer*/
#define OPUS_BAD_ARG -1
/** The mode struct passed is invalid @hideinitializer*/
/** Not enough bytes allocated in the buffer @hideinitializer*/
#define OPUS_BUFFER_TOO_SMALL -2
/** An internal error was detected @hideinitializer*/
#define OPUS_INTERNAL_ERROR -3
@ -65,7 +65,7 @@ extern "C" {
#ifndef OPUS_EXPORT
# if defined(WIN32)
# ifdef OPUS_BUILD
# if defined(OPUS_BUILD) && defined(DLL_EXPORT)
# define OPUS_EXPORT __declspec(dllexport)
# else
# define OPUS_EXPORT
@ -165,8 +165,9 @@ extern "C" {
#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042
#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043
/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
#define OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST 4046
#define OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST 4047
/* Macros to trigger compilation errors when the wrong types are provided to a CTL */
#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
@ -208,6 +209,9 @@ extern "C" {
#define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */
#define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */
#define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */
#define OPUS_FRAMESIZE_80_MS 5007 /**< Use 80 ms frames */
#define OPUS_FRAMESIZE_100_MS 5008 /**< Use 100 ms frames */
#define OPUS_FRAMESIZE_120_MS 5009 /**< Use 120 ms frames */
/**@}*/
@ -274,7 +278,6 @@ extern "C" {
/** Enables or disables variable bitrate (VBR) in the encoder.
* The configured bitrate may not be met exactly because frames must
* be an integer number of bytes in length.
* @warning Only the MDCT mode of Opus can provide hard CBR behavior.
* @see OPUS_GET_VBR
* @see OPUS_SET_VBR_CONSTRAINT
* @param[in] x <tt>opus_int32</tt>: Allowed values:
@ -454,14 +457,6 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x)
/** Gets the sampling rate the encoder or decoder was initialized with.
* This simply returns the <code>Fs</code> value passed to opus_encoder_init()
* or opus_decoder_init().
* @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
* @hideinitializer
*/
#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
/** Gets the total samples of delay added by the entire codec.
* This can be queried by the encoder and then the provided number of samples can be
* skipped on from the start of the decoder's output to provide time aligned input
@ -498,9 +493,9 @@ extern "C" {
#define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x)
/** Configures the encoder's expected packet loss percentage.
* Higher values with trigger progressively more loss resistant behavior in the encoder
* at the expense of quality at a given bitrate in the lossless case, but greater quality
* under loss.
* Higher values trigger progressively more loss resistant behavior in the encoder
* at the expense of quality at a given bitrate in the absence of packet loss, but
* greater quality under loss.
* @see OPUS_GET_PACKET_LOSS_PERC
* @param[in] x <tt>opus_int32</tt>: Loss percentage in the range 0-100, inclusive (default: 0).
* @hideinitializer */
@ -532,7 +527,19 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x)
/** Configures the depth of signal being encoded.
*
* This is a hint which helps the encoder identify silence and near-silence.
* It represents the number of significant bits of linear intensity below
* which the signal contains ignorable quantization or other noise.
*
* For example, OPUS_SET_LSB_DEPTH(14) would be an appropriate setting
* for G.711 u-law input. OPUS_SET_LSB_DEPTH(16) would be appropriate
* for 16-bit linear pcm input with opus_encode_float().
*
* When using opus_encode() instead of opus_encode_float(), or when libopus
* is compiled for fixed-point, the encoder uses the minimum of the value
* set here and the value 16.
*
* @see OPUS_GET_LSB_DEPTH
* @param[in] x <tt>opus_int32</tt>: Input precision in bits, between 8 and 24
* (default: 24).
@ -545,11 +552,6 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x)
/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
* @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
* @hideinitializer */
#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
/** Configures the encoder's use of variable duration frames.
* When variable duration is enabled, the encoder is free to use a shorter frame
* size than the one requested in the opus_encode*() call.
@ -558,41 +560,59 @@ extern "C" {
* packet. The part of the audio that was not encoded needs to be resent to the
* encoder for the next call. Do not use this option unless you <b>really</b>
* know what you are doing.
* @see OPUS_GET_EXPERT_VARIABLE_DURATION
* @see OPUS_GET_EXPERT_FRAME_DURATION
* @param[in] x <tt>opus_int32</tt>: Allowed values:
* <dl>
* <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
* <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_80_MS</dt><dd>Use 80 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_100_MS</dt><dd>Use 100 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_120_MS</dt><dd>Use 120 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
* </dl>
* @hideinitializer */
#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
/** Gets the encoder's configured use of variable duration frames.
* @see OPUS_SET_EXPERT_VARIABLE_DURATION
* @see OPUS_SET_EXPERT_FRAME_DURATION
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
* <dl>
* <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
* <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_80_MS</dt><dd>Use 80 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_100_MS</dt><dd>Use 100 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_120_MS</dt><dd>Use 120 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
* </dl>
* @hideinitializer */
#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
/** If set to 1, disables almost all use of prediction, making frames almost
completely independent. This reduces quality. (default : 0)
* completely independent. This reduces quality.
* @see OPUS_GET_PREDICTION_DISABLED
* @param[in] x <tt>opus_int32</tt>: Allowed values:
* <dl>
* <dt>0</dt><dd>Enable prediction (default).</dd>
* <dt>1</dt><dd>Disable prediction.</dd>
* </dl>
* @hideinitializer */
#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x)
/** Gets the encoder's configured prediction status.
* @see OPUS_SET_PREDICTION_DISABLED
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
* <dl>
* <dt>0</dt><dd>Prediction enabled (default).</dd>
* <dt>1</dt><dd>Prediction disabled.</dd>
* </dl>
* @hideinitializer */
#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x)
@ -649,18 +669,6 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x)
/** Gets the pitch of the last decoded frame, if available.
* This can be used for any post-processing algorithm requiring the use of pitch,
* e.g. time stretching/shortening. If the last frame was not voiced, or if the
* pitch was not coded in the frame, then zero is returned.
*
* This CTL is only implemented for decoder instances.
*
* @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
*
* @hideinitializer */
#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
/** Gets the encoder's configured bandpass or the decoder's last bandpass.
* @see OPUS_SET_BANDWIDTH
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
@ -675,6 +683,38 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
/** Gets the sampling rate the encoder or decoder was initialized with.
* This simply returns the <code>Fs</code> value passed to opus_encoder_init()
* or opus_decoder_init().
* @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
* @hideinitializer
*/
#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
/** If set to 1, disables the use of phase inversion for intensity stereo,
* improving the quality of mono downmixes, but slightly reducing normal
* stereo quality. Disabling phase inversion in the decoder does not comply
* with RFC 6716, although it does not cause any interoperability issue and
* is expected to become part of the Opus standard once RFC 6716 is updated
* by draft-ietf-codec-opus-update.
* @see OPUS_GET_PHASE_INVERSION_DISABLED
* @param[in] x <tt>opus_int32</tt>: Allowed values:
* <dl>
* <dt>0</dt><dd>Enable phase inversion (default).</dd>
* <dt>1</dt><dd>Disable phase inversion.</dd>
* </dl>
* @hideinitializer */
#define OPUS_SET_PHASE_INVERSION_DISABLED(x) OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST, __opus_check_int(x)
/** Gets the encoder's configured phase inversion status.
* @see OPUS_SET_PHASE_INVERSION_DISABLED
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
* <dl>
* <dt>0</dt><dd>Stereo phase inversion enabled (default).</dd>
* <dt>1</dt><dd>Stereo phase inversion disabled.</dd>
* </dl>
* @hideinitializer */
#define OPUS_GET_PHASE_INVERSION_DISABLED(x) OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST, __opus_check_int_ptr(x)
/**@}*/
/** @defgroup opus_decoderctls Decoder related CTLs
@ -699,6 +739,23 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x)
/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
* @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
* @hideinitializer */
#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
/** Gets the pitch of the last decoded frame, if available.
* This can be used for any post-processing algorithm requiring the use of pitch,
* e.g. time stretching/shortening. If the last frame was not voiced, or if the
* pitch was not coded in the frame, then zero is returned.
*
* This CTL is only implemented for decoder instances.
*
* @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
*
* @hideinitializer */
#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
/**@}*/
/** @defgroup opus_libinfo Opus library information functions
@ -713,6 +770,10 @@ extern "C" {
OPUS_EXPORT const char *opus_strerror(int error);
/** Gets the libopus version string.
*
* Applications may look for the substring "-fixed" in the version string to
* determine whether they have a fixed-point or floating-point build at
* runtime.
*
* @returns Version string
*/

View File

@ -110,10 +110,10 @@ extern "C" {
* packets produced by the encoder. Some basic information, such as packet
* duration, can be computed without any special negotiation.
*
* The format for multistream Opus packets is defined in the
* <a href="http://tools.ietf.org/html/draft-terriberry-oggopus">Ogg
* encapsulation specification</a> and is based on the self-delimited Opus
* framing described in Appendix B of <a href="http://tools.ietf.org/html/rfc6716">RFC 6716</a>.
* The format for multistream Opus packets is defined in
* <a href="https://tools.ietf.org/html/rfc7845">RFC 7845</a>
* and is based on the self-delimited Opus framing described in Appendix B of
* <a href="https://tools.ietf.org/html/rfc6716">RFC 6716</a>.
* Normal Opus packets are just a degenerate case of multistream Opus packets,
* and can be encoded or decoded with the multistream API by setting
* <code>streams</code> to <code>1</code> when initializing the encoder or
@ -140,7 +140,7 @@ extern "C" {
*
* The output channels specified by the encoder
* should use the
* <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
* <a href="https://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-810004.3.9">Vorbis
* channel ordering</a>. A decoder may wish to apply an additional permutation
* to the mapping the encoder used to achieve a different output channel
* order (e.g. for outputing in WAV order).

View File

@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* Number of binary divisions, when not in low complexity mode */
#define BIN_DIV_STEPS_A2NLSF_FIX 3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */
#define MAX_ITERATIONS_A2NLSF_FIX 30
#define MAX_ITERATIONS_A2NLSF_FIX 16
/* Helper function for A2NLSF(..) */
/* Transforms polynomials from cos(n*f) to cos(f)^n */
@ -71,8 +71,23 @@ static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial ev
y32 = p[ dd ]; /* Q16 */
x_Q16 = silk_LSHIFT( x, 4 );
for( n = dd - 1; n >= 0; n-- ) {
y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */
if ( opus_likely( 8 == dd ) )
{
y32 = silk_SMLAWW( p[ 7 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 6 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 5 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 4 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 3 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 2 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 1 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 0 ], y32, x_Q16 );
}
else
{
for( n = dd - 1; n >= 0; n-- ) {
y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */
}
}
return y32;
}
@ -115,7 +130,7 @@ void silk_A2NLSF(
const opus_int d /* I Filter order (must be even) */
)
{
opus_int i, k, m, dd, root_ix, ffrac;
opus_int i, k, m, dd, root_ix, ffrac;
opus_int32 xlo, xhi, xmid;
opus_int32 ylo, yhi, ymid, thr;
opus_int32 nom, den;
@ -224,13 +239,13 @@ void silk_A2NLSF(
/* Set NLSFs to white spectrum and exit */
NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 );
for( k = 1; k < d; k++ ) {
NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] );
NLSF[ k ] = (opus_int16)silk_ADD16( NLSF[ k-1 ], NLSF[ 0 ] );
}
return;
}
/* Error: Apply progressively more bandwidth expansion and run again */
silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/
silk_bwexpander_32( a_Q16, d, 65536 - silk_LSHIFT( 1, i ) );
silk_A2NLSF_init( a_Q16, P, Q, dd );
p = P; /* Pointer to polynomial */

View File

@ -111,7 +111,8 @@ opus_int silk_Decode( /* O Returns error co
opus_int newPacketFlag, /* I Indicates first decoder call for this packet */
ec_dec *psRangeDec, /* I/O Compressor data structure */
opus_int16 *samplesOut, /* O Decoded output speech vector */
opus_int32 *nSamplesOut /* O Number of samples decoded */
opus_int32 *nSamplesOut, /* O Number of samples decoded */
int arch /* I Run-time architecture */
);
#if 0

View File

@ -34,9 +34,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* Generates excitation for CNG LPC synthesis */
static OPUS_INLINE void silk_CNG_exc(
opus_int32 residual_Q10[], /* O CNG residual signal Q10 */
opus_int32 exc_Q14[], /* O CNG excitation signal Q10 */
opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */
opus_int32 Gain_Q16, /* I Gain to apply */
opus_int length, /* I Length */
opus_int32 *rand_seed /* I/O Seed to random index generator */
)
@ -55,7 +54,7 @@ static OPUS_INLINE void silk_CNG_exc(
idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
silk_assert( idx >= 0 );
silk_assert( idx <= CNG_BUF_MASK_MAX );
residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
exc_Q14[ i ] = exc_buf_Q14[ idx ];
}
*rand_seed = seed;
}
@ -85,7 +84,7 @@ void silk_CNG(
)
{
opus_int i, subfr;
opus_int32 sum_Q6, max_Gain_Q16;
opus_int32 LPC_pred_Q10, max_Gain_Q16, gain_Q16, gain_Q10;
opus_int16 A_Q12[ MAX_LPC_ORDER ];
silk_CNG_struct *psCNG = &psDec->sCNG;
SAVE_STACK;
@ -124,47 +123,60 @@ void silk_CNG(
/* Add CNG when packet is lost or during DTX */
if( psDec->lossCnt ) {
VARDECL( opus_int32, CNG_sig_Q10 );
ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
VARDECL( opus_int32, CNG_sig_Q14 );
ALLOC( CNG_sig_Q14, length + MAX_LPC_ORDER, opus_int32 );
/* Generate CNG excitation */
silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) {
gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 );
gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 );
} else {
gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 );
gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
}
gain_Q10 = silk_RSHIFT( gain_Q16, 6 );
silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed );
/* Convert CNG NLSF to filter representation */
silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
/* Generate CNG signal, by synthesis filtering */
silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
for( i = 0; i < length; i++ ) {
silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
if( psDec->LPC_order == 16 ) {
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
}
/* Update states */
CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( CNG_sig_Q14[ MAX_LPC_ORDER + i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) );
/* Scale with Gain and add to input signal */
frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) );
frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) );
}
silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
} else {
silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) );
}

View File

@ -39,16 +39,24 @@ POSSIBILITY OF SUCH DAMAGE.
/* first d output samples are set to zero */
/*******************************************/
/* OPT: Using celt_fir() for this function should be faster, but it may cause
integer overflows in intermediate values (not final results), which the
current implementation silences by casting to unsigned. Enabling
this should be safe in pretty much all cases, even though it is not technically
C89-compliant. */
#define USE_CELT_FIR 0
void silk_LPC_analysis_filter(
opus_int16 *out, /* O Output signal */
const opus_int16 *in, /* I Input signal */
const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
const opus_int32 len, /* I Signal length */
const opus_int32 d /* I Filter order */
const opus_int32 d, /* I Filter order */
int arch /* I Run-time architecture */
)
{
opus_int j;
#ifdef FIXED_POINT
#if USE_CELT_FIR
opus_int16 mem[SILK_MAX_ORDER_LPC];
opus_int16 num[SILK_MAX_ORDER_LPC];
#else
@ -61,7 +69,7 @@ void silk_LPC_analysis_filter(
silk_assert( (d & 1) == 0 );
silk_assert( d <= len );
#ifdef FIXED_POINT
#if USE_CELT_FIR
silk_assert( d <= SILK_MAX_ORDER_LPC );
for ( j = 0; j < d; j++ ) {
num[ j ] = -B[ j ];
@ -69,11 +77,12 @@ void silk_LPC_analysis_filter(
for (j=0;j<d;j++) {
mem[ j ] = in[ d - j - 1 ];
}
celt_fir( in + d, num, out + d, len - d, d, mem );
celt_fir( in + d, num, out + d, len - d, d, mem, arch );
for ( j = 0; j < d; j++ ) {
out[ j ] = 0;
}
#else
(void)arch;
for( ix = d; ix < len; ix++ ) {
in_ptr = &in[ ix - 1 ];

View File

@ -0,0 +1,81 @@
/***********************************************************************
Copyright (c) 2013, Koen Vos. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "SigProc_FIX.h"
/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */
void silk_LPC_fit(
opus_int16 *a_QOUT, /* O Output signal */
opus_int32 *a_QIN, /* I/O Input signal */
const opus_int QOUT, /* I Input Q domain */
const opus_int QIN, /* I Input Q domain */
const opus_int d /* I Filter order */
)
{
opus_int i, k, idx = 0;
opus_int32 maxabs, absval, chirp_Q16;
/* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */
for( i = 0; i < 10; i++ ) {
/* Find maximum absolute value and its index */
maxabs = 0;
for( k = 0; k < d; k++ ) {
absval = silk_abs( a_QIN[k] );
if( absval > maxabs ) {
maxabs = absval;
idx = k;
}
}
maxabs = silk_RSHIFT_ROUND( maxabs, QIN - QOUT );
if( maxabs > silk_int16_MAX ) {
/* Reduce magnitude of prediction coefficients */
maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */
chirp_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ),
silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) );
silk_bwexpander_32( a_QIN, d, chirp_Q16 );
} else {
break;
}
}
if( i == 10 ) {
/* Reached the last iteration, clip the coefficients */
for( k = 0; k < d; k++ ) {
a_QOUT[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a_QIN[ k ], QIN - QOUT ) );
a_QIN[ k ] = silk_LSHIFT( (opus_int32)a_QOUT[ k ], QIN - QOUT );
}
} else {
for( k = 0; k < d; k++ ) {
a_QOUT[ k ] = (opus_int16)silk_RSHIFT_ROUND( a_QIN[ k ], QIN - QOUT );
}
}
}

View File

@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
#include "SigProc_FIX.h"
#include "define.h"
#define QA 24
#define A_LIMIT SILK_FIX_CONST( 0.99975, QA )
@ -39,68 +40,80 @@ POSSIBILITY OF SUCH DAMAGE.
/* Compute inverse of LPC prediction gain, and */
/* test if LPC coefficients are stable (all poles within unit circle) */
static opus_int32 LPC_inverse_pred_gain_QA( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 A_QA[ 2 ][ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
opus_int32 A_QA[ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
const opus_int order /* I Prediction order */
)
{
opus_int k, n, mult2Q;
opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp_QA;
opus_int32 *Aold_QA, *Anew_QA;
opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2;
Anew_QA = A_QA[ order & 1 ];
invGain_Q30 = (opus_int32)1 << 30;
invGain_Q30 = SILK_FIX_CONST( 1, 30 );
for( k = order - 1; k > 0; k-- ) {
/* Check for stability */
if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) {
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA );
rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA );
/* rc_mult1_Q30 range: [ 1 : 2^30 ] */
rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */
silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) );
/* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
/* Update inverse gain */
/* invGain_Q30 range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
/* Swap pointers */
Aold_QA = Anew_QA;
Anew_QA = A_QA[ k & 1 ];
/* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
/* Update AR coefficient */
for( n = 0; n < k; n++ ) {
tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 );
Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q );
for( n = 0; n < (k + 1) >> 1; n++ ) {
opus_int64 tmp64;
tmp1 = A_QA[ n ];
tmp2 = A_QA[ k - n - 1 ];
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1,
MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ n ] = ( opus_int32 )tmp64;
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2,
MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ k - n - 1 ] = ( opus_int32 )tmp64;
}
}
/* Check for stability */
if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) {
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA );
rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA );
/* Range: [ 1 : 2^30 ] */
rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
/* Update inverse gain */
/* Range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= 1<<30 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
return invGain_Q30;
}
@ -112,16 +125,13 @@ opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse predi
)
{
opus_int k;
opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
opus_int32 *Anew_QA;
opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ];
opus_int32 DC_resp = 0;
Anew_QA = Atmp_QA[ order & 1 ];
/* Increase Q domain of the AR coefficients */
for( k = 0; k < order; k++ ) {
DC_resp += (opus_int32)A_Q12[ k ];
Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 );
Atmp_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 );
}
/* If the DC is unstable, we don't even need to do the full calculations */
if( DC_resp >= 4096 ) {
@ -139,14 +149,11 @@ opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse pred
)
{
opus_int k;
opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
opus_int32 *Anew_QA;
Anew_QA = Atmp_QA[ order & 1 ];
opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ];
/* Increase Q domain of the AR coefficients */
for( k = 0; k < order; k++ ) {
Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA );
Atmp_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA );
}
return LPC_inverse_pred_gain_QA( Atmp_QA, order );

View File

@ -319,14 +319,6 @@ static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
return(tmp);
}
#undef silk_ADD_POS_SAT64
static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
opus_int64 tmp;
ops_count += 1;
tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)));
return(tmp);
}
#undef silk_LSHIFT8
static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
opus_int8 ret;

View File

@ -539,8 +539,7 @@ static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, cha
no checking needed for silk_POS_SAT32
no checking needed for silk_ADD_POS_SAT8
no checking needed for silk_ADD_POS_SAT16
no checking needed for silk_ADD_POS_SAT32
no checking needed for silk_ADD_POS_SAT64 */
no checking needed for silk_ADD_POS_SAT32 */
#undef silk_LSHIFT8
#define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__)

View File

@ -83,15 +83,14 @@ void silk_NLSF2A(
opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta;
opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ];
opus_int32 maxabs, absval, idx=0, sc_Q16;
silk_assert( LSF_COS_TAB_SZ_FIX == 128 );
silk_assert( d==10||d==16 );
silk_assert( d==10 || d==16 );
/* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */
ordering = d == 16 ? ordering16 : ordering10;
for( k = 0; k < d; k++ ) {
silk_assert(NLSF[k] >= 0 );
silk_assert( NLSF[k] >= 0 );
/* f_int on a scale 0-127 (rounded down) */
f_int = silk_RSHIFT( NLSF[k], 15 - 7 );
@ -126,52 +125,15 @@ void silk_NLSF2A(
a32_QA1[ d-k-1 ] = Qtmp - Ptmp; /* QA+1 */
}
/* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */
for( i = 0; i < 10; i++ ) {
/* Find maximum absolute value and its index */
maxabs = 0;
for( k = 0; k < d; k++ ) {
absval = silk_abs( a32_QA1[k] );
if( absval > maxabs ) {
maxabs = absval;
idx = k;
}
}
maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 ); /* QA+1 -> Q12 */
/* Convert int32 coefficients to Q12 int16 coefs */
silk_LPC_fit( a_Q12, a32_QA1, 12, QA + 1, d );
if( maxabs > silk_int16_MAX ) {
/* Reduce magnitude of prediction coefficients */
maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */
sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ),
silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) );
silk_bwexpander_32( a32_QA1, d, sc_Q16 );
} else {
break;
}
}
if( i == 10 ) {
/* Reached the last iteration, clip the coefficients */
for( i = 0; silk_LPC_inverse_pred_gain( a_Q12, d ) == 0 && i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) {
/* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */
/* on the unscaled coefficients, convert to Q12 and measure again */
silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) );
for( k = 0; k < d; k++ ) {
a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) ); /* QA+1 -> Q12 */
a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 );
}
} else {
for( k = 0; k < d; k++ ) {
a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
}
}
for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) {
if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
/* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */
/* on the unscaled coefficients, convert to Q12 and measure again */
silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) );
for( k = 0; k < d; k++ ) {
a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
}
} else {
break;
a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
}
}
}

View File

@ -33,36 +33,44 @@ POSSIBILITY OF SUCH DAMAGE.
/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
void silk_NLSF_VQ(
opus_int32 err_Q26[], /* O Quantization errors [K] */
opus_int32 err_Q24[], /* O Quantization errors [K] */
const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */
const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */
const opus_int16 pWght_Q9[], /* I Codebook weights [K*LPC_order] */
const opus_int K, /* I Number of codebook vectors */
const opus_int LPC_order /* I Number of LPCs */
)
{
opus_int i, m;
opus_int32 diff_Q15, sum_error_Q30, sum_error_Q26;
opus_int i, m;
opus_int32 diff_Q15, diffw_Q24, sum_error_Q24, pred_Q24;
const opus_int16 *w_Q9_ptr;
const opus_uint8 *cb_Q8_ptr;
silk_assert( LPC_order <= 16 );
silk_assert( ( LPC_order & 1 ) == 0 );
/* Loop over codebook */
cb_Q8_ptr = pCB_Q8;
w_Q9_ptr = pWght_Q9;
for( i = 0; i < K; i++ ) {
sum_error_Q26 = 0;
for( m = 0; m < LPC_order; m += 2 ) {
/* Compute weighted squared quantization error for index m */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 );
sum_error_Q24 = 0;
pred_Q24 = 0;
for( m = LPC_order-2; m >= 0; m -= 2 ) {
/* Compute weighted absolute predictive quantization error for index m + 1 */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m + 1 ], (opus_int32)cb_Q8_ptr[ m + 1 ], 7 ); /* range: [ -32767 : 32767 ]*/
diffw_Q24 = silk_SMULBB( diff_Q15, w_Q9_ptr[ m + 1 ] );
sum_error_Q24 = silk_ADD32( sum_error_Q24, silk_abs( silk_SUB_RSHIFT32( diffw_Q24, pred_Q24, 1 ) ) );
pred_Q24 = diffw_Q24;
/* Compute weighted squared quantization error for index m + 1 */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 );
/* Compute weighted absolute predictive quantization error for index m */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)cb_Q8_ptr[ m ], 7 ); /* range: [ -32767 : 32767 ]*/
diffw_Q24 = silk_SMULBB( diff_Q15, w_Q9_ptr[ m ] );
sum_error_Q24 = silk_ADD32( sum_error_Q24, silk_abs( silk_SUB_RSHIFT32( diffw_Q24, pred_Q24, 1 ) ) );
pred_Q24 = diffw_Q24;
sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 );
silk_assert( sum_error_Q26 >= 0 );
silk_assert( sum_error_Q30 >= 0 );
silk_assert( sum_error_Q24 >= 0 );
}
err_Q26[ i ] = sum_error_Q26;
err_Q24[ i ] = sum_error_Q24;
cb_Q8_ptr += LPC_order;
w_Q9_ptr += LPC_order;
}
}

View File

@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
/* Predictive dequantizer for NLSF residuals */
static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */
static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */
opus_int16 x_Q10[], /* O Output [ order ] */
const opus_int8 indices[], /* I Quantization indices [ order ] */
const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */
@ -70,15 +70,9 @@ void silk_NLSF_decode(
opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
opus_int16 ec_ix[ MAX_LPC_ORDER ];
opus_int16 res_Q10[ MAX_LPC_ORDER ];
opus_int16 W_tmp_QW[ MAX_LPC_ORDER ];
opus_int32 W_tmp_Q9, NLSF_Q15_tmp;
opus_int32 NLSF_Q15_tmp;
const opus_uint8 *pCB_element;
/* Decode first stage */
pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
for( i = 0; i < psNLSF_CB->order; i++ ) {
pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 );
}
const opus_int16 *pCB_Wght_Q9;
/* Unpack entropy table indices and predictor for current CB1 index */
silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] );
@ -86,13 +80,11 @@ void silk_NLSF_decode(
/* Predictive residual dequantizer */
silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order );
/* Weights from codebook vector */
silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order );
/* Apply inverse square-rooted weights and add to output */
/* Apply inverse square-rooted weights to first stage and add to output */
pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
pCB_Wght_Q9 = &psNLSF_CB->CB1_Wght_Q9[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
for( i = 0; i < psNLSF_CB->order; i++ ) {
W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) );
NLSF_Q15_tmp = silk_ADD_LSHIFT32( silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), pCB_Wght_Q9[ i ] ), (opus_int16)pCB_element[ i ], 7 );
pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 );
}

View File

@ -46,8 +46,9 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
)
{
opus_int i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
opus_int pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5;
opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16;
opus_int pred_Q10, diff_Q10, rate0_Q5, rate1_Q5;
opus_int16 out0_Q10, out1_Q10;
opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25;
opus_int ind_sort[ NLSF_QUANT_DEL_DEC_STATES ];
opus_int8 ind[ NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
opus_int16 prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
@ -56,38 +57,47 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
opus_int32 RD_max_Q25[ NLSF_QUANT_DEL_DEC_STATES ];
const opus_uint8 *rates_Q5;
opus_int out0_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT];
opus_int out1_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT];
for (i = -NLSF_QUANT_MAX_AMPLITUDE_EXT; i <= NLSF_QUANT_MAX_AMPLITUDE_EXT-1; i++)
{
out0_Q10 = silk_LSHIFT( i, 10 );
out1_Q10 = silk_ADD16( out0_Q10, 1024 );
if( i > 0 ) {
out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else if( i == 0 ) {
out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else if( i == -1 ) {
out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else {
out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
}
out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out0_Q10, quant_step_size_Q16 ), 16 );
out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out1_Q10, quant_step_size_Q16 ), 16 );
}
silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */
nStates = 1;
RD_Q25[ 0 ] = 0;
prev_out_Q10[ 0 ] = 0;
for( i = order - 1; ; i-- ) {
for( i = order - 1; i >= 0; i-- ) {
rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
in_Q10 = x_Q10[ i ];
for( j = 0; j < nStates; j++ ) {
pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] );
pred_Q10 = silk_RSHIFT( silk_SMULBB( (opus_int16)pred_coef_Q8[ i ], prev_out_Q10[ j ] ), 8 );
res_Q10 = silk_SUB16( in_Q10, pred_Q10 );
ind_tmp = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 );
ind_tmp = silk_RSHIFT( silk_SMULBB( inv_quant_step_size_Q6, res_Q10 ), 16 );
ind_tmp = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
ind[ j ][ i ] = (opus_int8)ind_tmp;
/* compute outputs for ind_tmp and ind_tmp + 1 */
out0_Q10 = silk_LSHIFT( ind_tmp, 10 );
out1_Q10 = silk_ADD16( out0_Q10, 1024 );
if( ind_tmp > 0 ) {
out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else if( ind_tmp == 0 ) {
out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else if( ind_tmp == -1 ) {
out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else {
out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
}
out0_Q10 = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
out1_Q10 = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
out0_Q10 = out0_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
out1_Q10 = out1_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
out0_Q10 = silk_ADD16( out0_Q10, pred_Q10 );
out1_Q10 = silk_ADD16( out1_Q10, pred_Q10 );
prev_out_Q10[ j ] = out0_Q10;
@ -121,7 +131,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 );
}
if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) {
if( silk_LSHIFT( nStates, 1 ) <= NLSF_QUANT_DEL_DEC_STATES ) {
/* double number of states and copy */
for( j = 0; j < nStates; j++ ) {
ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1;
@ -130,7 +140,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
ind[ j ][ i ] = ind[ j - nStates ][ i ];
}
} else if( i > 0 ) {
} else {
/* sort lower and upper half of RD_Q25, pairwise */
for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) {
@ -181,8 +191,6 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 );
}
} else { /* i == 0 */
break;
}
}

View File

@ -37,31 +37,29 @@ POSSIBILITY OF SUCH DAMAGE.
/***********************/
opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */
opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */
opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */
opus_int16 *pNLSF_Q15, /* I/O (Un)quantized NLSF vector [ LPC_ORDER ] */
const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */
const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */
const opus_int16 *pW_Q2, /* I NLSF weight vector [ LPC_ORDER ] */
const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */
const opus_int nSurvivors, /* I Max survivors after first stage */
const opus_int signalType /* I Signal type: 0/1/2 */
)
{
opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7;
opus_int32 W_tmp_Q9;
VARDECL( opus_int32, err_Q26 );
opus_int32 W_tmp_Q9, ret;
VARDECL( opus_int32, err_Q24 );
VARDECL( opus_int32, RD_Q25 );
VARDECL( opus_int, tempIndices1 );
VARDECL( opus_int8, tempIndices2 );
opus_int16 res_Q15[ MAX_LPC_ORDER ];
opus_int16 res_Q10[ MAX_LPC_ORDER ];
opus_int16 NLSF_tmp_Q15[ MAX_LPC_ORDER ];
opus_int16 W_tmp_QW[ MAX_LPC_ORDER ];
opus_int16 W_adj_Q5[ MAX_LPC_ORDER ];
opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
opus_int16 ec_ix[ MAX_LPC_ORDER ];
const opus_uint8 *pCB_element, *iCDF_ptr;
const opus_int16 *pCB_Wght_Q9;
SAVE_STACK;
silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS );
silk_assert( signalType >= 0 && signalType <= 2 );
silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 );
@ -69,12 +67,12 @@ opus_int32 silk_NLSF_encode( /* O Returns
silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
/* First stage: VQ */
ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 );
silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order );
ALLOC( err_Q24, psNLSF_CB->nVectors, opus_int32 );
silk_NLSF_VQ( err_Q24, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->CB1_Wght_Q9, psNLSF_CB->nVectors, psNLSF_CB->order );
/* Sort the quantization errors */
ALLOC( tempIndices1, nSurvivors, opus_int );
silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
silk_insertion_sort_increasing( err_Q24, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
ALLOC( RD_Q25, nSurvivors, opus_int32 );
ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 );
@ -85,23 +83,12 @@ opus_int32 silk_NLSF_encode( /* O Returns
/* Residual after first stage */
pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ];
pCB_Wght_Q9 = &psNLSF_CB->CB1_Wght_Q9[ ind1 * psNLSF_CB->order ];
for( i = 0; i < psNLSF_CB->order; i++ ) {
NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 );
res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ];
}
/* Weights from codebook vector */
silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order );
/* Apply square-rooted weights */
for( i = 0; i < psNLSF_CB->order; i++ ) {
W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 );
}
/* Modify input weights accordingly */
for( i = 0; i < psNLSF_CB->order; i++ ) {
W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] );
W_tmp_Q9 = pCB_Wght_Q9[ i ];
res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ], W_tmp_Q9 ), 14 );
W_adj_Q5[ i ] = silk_DIV32_varQ( (opus_int32)pW_Q2[ i ], silk_SMULBB( W_tmp_Q9, W_tmp_Q9 ), 21 );
}
/* Unpack entropy table indices and predictor for current CB1 index */
@ -131,6 +118,7 @@ opus_int32 silk_NLSF_encode( /* O Returns
/* Decode */
silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
ret = RD_Q25[ 0 ];
RESTORE_STACK;
return RD_Q25[ 0 ];
return ret;
}

View File

@ -130,7 +130,7 @@ void silk_NLSF_stabilize(
/* Keep delta_min distance between the NLSFs */
for( i = 1; i < L; i++ )
NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], NLSF_Q15[i-1] + NDeltaMin_Q15[i] );
NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], silk_ADD_SAT16( NLSF_Q15[i-1], NDeltaMin_Q15[i] ) );
/* Last NLSF should be no higher than 1 - NDeltaMin[L] */
NLSF_Q15[L-1] = silk_min_int( NLSF_Q15[L-1], (1<<15) - NDeltaMin_Q15[L] );

View File

@ -31,11 +31,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
#include "stack_alloc.h"
#include "NSQ.h"
static OPUS_INLINE void silk_nsq_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
const opus_int32 x_Q3[], /* I input in Q3 */
const opus_int16 x16[], /* I input */
opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */
const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@ -46,6 +48,7 @@ static OPUS_INLINE void silk_nsq_scale_states(
const opus_int signal_type /* I Signal type */
);
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
static OPUS_INLINE void silk_noise_shape_quantizer(
silk_nsq_state *NSQ, /* I/O NSQ state */
opus_int signalType, /* I Signal type */
@ -65,18 +68,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
opus_int offset_Q10, /* I */
opus_int length, /* I Input length */
opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
opus_int predictLPCOrder /* I Prediction filter order */
opus_int predictLPCOrder, /* I Prediction filter order */
int arch /* I Architecture */
);
#endif
void silk_NSQ(
void silk_NSQ_c
(
const silk_encoder_state *psEncC, /* I/O Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
@ -111,8 +117,7 @@ void silk_NSQ(
LSF_interpolation_flag = 1;
}
ALLOC( sLTP_Q15,
psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
/* Set up pointers to start of sub frame */
@ -122,7 +127,7 @@ void silk_NSQ(
for( k = 0; k < psEncC->nb_subfr; k++ ) {
A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ];
B_Q14 = &LTPCoef_Q14[ k * LTP_ORDER ];
AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ];
AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ];
/* Noise shape parameters */
silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
@ -141,20 +146,20 @@ void silk_NSQ(
silk_assert( start_idx > 0 );
silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
NSQ->rewhite_flag = 1;
NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
}
}
silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
silk_nsq_scale_states( psEncC, NSQ, x16, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
x_Q3 += psEncC->subfr_length;
x16 += psEncC->subfr_length;
pulses += psEncC->subfr_length;
pxq += psEncC->subfr_length;
}
@ -172,7 +177,11 @@ void silk_NSQ(
/***********************************/
/* silk_noise_shape_quantizer */
/***********************************/
static OPUS_INLINE void silk_noise_shape_quantizer(
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
static OPUS_INLINE
#endif
void silk_noise_shape_quantizer(
silk_nsq_state *NSQ, /* I/O NSQ state */
opus_int signalType, /* I Signal type */
const opus_int32 x_sc_Q10[], /* I */
@ -191,15 +200,19 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
opus_int offset_Q10, /* I */
opus_int length, /* I Input length */
opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
opus_int predictLPCOrder /* I Prediction filter order */
opus_int predictLPCOrder, /* I Prediction filter order */
int arch /* I Architecture */
)
{
opus_int i, j;
opus_int i;
opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
opus_int32 n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
#ifdef silk_short_prediction_create_arch_coef
opus_int32 a_Q12_arch[MAX_LPC_ORDER];
#endif
shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
@ -208,32 +221,16 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
/* Set up short term AR state */
psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
#ifdef silk_short_prediction_create_arch_coef
silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
#endif
for( i = 0; i < length; i++ ) {
/* Generate dither */
NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
/* Short-term prediction */
silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ 0 ], a_Q12[ 0 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
if( predictLPCOrder == 16 ) {
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
}
LPC_pred_Q10 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
/* Long-term prediction */
if( signalType == TYPE_VOICED ) {
@ -252,23 +249,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
/* Noise shape feedback */
silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
tmp2 = psLPC_Q14[ 0 ];
tmp1 = NSQ->sAR2_Q14[ 0 ];
NSQ->sAR2_Q14[ 0 ] = tmp2;
n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 );
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] );
for( j = 2; j < shapingLPCOrder; j += 2 ) {
tmp2 = NSQ->sAR2_Q14[ j - 1 ];
NSQ->sAR2_Q14[ j - 1 ] = tmp1;
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] );
tmp1 = NSQ->sAR2_Q14[ j + 0 ];
NSQ->sAR2_Q14[ j + 0 ] = tmp2;
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] );
}
NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(&NSQ->sDiff_shp_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 ); /* Q11 -> Q12 */
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
@ -296,14 +278,27 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */
/* Flip sign depending on dither */
if ( NSQ->rand_seed < 0 ) {
r_Q10 = -r_Q10;
if( NSQ->rand_seed < 0 ) {
r_Q10 = -r_Q10;
}
r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
/* Find two quantization level candidates and measure their rate-distortion */
q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
if (Lambda_Q10 > 2048) {
/* For aggressive RDO, the bias becomes more than one pulse. */
int rdo_offset = Lambda_Q10/2 - 512;
if (q1_Q10 > rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 );
} else if (q1_Q10 < -rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 );
} else if (q1_Q10 < 0) {
q1_Q0 = -1;
} else {
q1_Q0 = 0;
}
}
if( q1_Q0 > 0 ) {
q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
@ -354,7 +349,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
/* Update states */
psLPC_Q14++;
*psLPC_Q14 = xq_Q14;
sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 );
NSQ->sDiff_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_sc_Q10[ i ], 4 );
sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( NSQ->sDiff_shp_Q14, n_AR_Q12, 2 );
NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14;
NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 );
@ -373,7 +369,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
static OPUS_INLINE void silk_nsq_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
const opus_int32 x_Q3[], /* I input in Q3 */
const opus_int16 x16[], /* I input */
opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */
const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@ -385,28 +381,18 @@ static OPUS_INLINE void silk_nsq_scale_states(
)
{
opus_int i, lag;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26;
lag = pitchL[ subfr ];
inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
silk_assert( inv_gain_Q31 != 0 );
/* Calculate gain adjustment factor */
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
} else {
gain_adj_Q16 = (opus_int32)1 << 16;
}
/* Scale input */
inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 );
for( i = 0; i < psEncC->subfr_length; i++ ) {
x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 );
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
/* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
if( NSQ->rewhite_flag ) {
if( subfr == 0 ) {
@ -420,7 +406,9 @@ static OPUS_INLINE void silk_nsq_scale_states(
}
/* Adjust for changing gain */
if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
/* Scale long-term shaping state */
for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
@ -434,6 +422,7 @@ static OPUS_INLINE void silk_nsq_scale_states(
}
NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 );
NSQ->sDiff_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sDiff_shp_Q14 );
/* Scale short-term prediction and shaping states */
for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
@ -442,5 +431,8 @@ static OPUS_INLINE void silk_nsq_scale_states(
for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] );
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
}
}

View File

@ -0,0 +1,101 @@
/***********************************************************************
Copyright (c) 2014 Vidyo.
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_NSQ_H
#define SILK_NSQ_H
#include "SigProc_FIX.h"
#undef silk_short_prediction_create_arch_coef
static OPUS_INLINE opus_int32 silk_noise_shape_quantizer_short_prediction_c(const opus_int32 *buf32, const opus_int16 *coef16, opus_int order)
{
opus_int32 out;
silk_assert( order == 10 || order == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
out = silk_RSHIFT( order, 1 );
out = silk_SMLAWB( out, buf32[ 0 ], coef16[ 0 ] );
out = silk_SMLAWB( out, buf32[ -1 ], coef16[ 1 ] );
out = silk_SMLAWB( out, buf32[ -2 ], coef16[ 2 ] );
out = silk_SMLAWB( out, buf32[ -3 ], coef16[ 3 ] );
out = silk_SMLAWB( out, buf32[ -4 ], coef16[ 4 ] );
out = silk_SMLAWB( out, buf32[ -5 ], coef16[ 5 ] );
out = silk_SMLAWB( out, buf32[ -6 ], coef16[ 6 ] );
out = silk_SMLAWB( out, buf32[ -7 ], coef16[ 7 ] );
out = silk_SMLAWB( out, buf32[ -8 ], coef16[ 8 ] );
out = silk_SMLAWB( out, buf32[ -9 ], coef16[ 9 ] );
if( order == 16 )
{
out = silk_SMLAWB( out, buf32[ -10 ], coef16[ 10 ] );
out = silk_SMLAWB( out, buf32[ -11 ], coef16[ 11 ] );
out = silk_SMLAWB( out, buf32[ -12 ], coef16[ 12 ] );
out = silk_SMLAWB( out, buf32[ -13 ], coef16[ 13 ] );
out = silk_SMLAWB( out, buf32[ -14 ], coef16[ 14 ] );
out = silk_SMLAWB( out, buf32[ -15 ], coef16[ 15 ] );
}
return out;
}
#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) ((void)arch,silk_noise_shape_quantizer_short_prediction_c(in, coef, order))
static OPUS_INLINE opus_int32 silk_NSQ_noise_shape_feedback_loop_c(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order)
{
opus_int32 out;
opus_int32 tmp1, tmp2;
opus_int j;
tmp2 = data0[0];
tmp1 = data1[0];
data1[0] = tmp2;
out = silk_RSHIFT(order, 1);
out = silk_SMLAWB(out, tmp2, coef[0]);
for (j = 2; j < order; j += 2) {
tmp2 = data1[j - 1];
data1[j - 1] = tmp1;
out = silk_SMLAWB(out, tmp1, coef[j - 1]);
tmp1 = data1[j + 0];
data1[j + 0] = tmp2;
out = silk_SMLAWB(out, tmp2, coef[j]);
}
data1[order - 1] = tmp1;
out = silk_SMLAWB(out, tmp1, coef[order - 1]);
/* Q11 -> Q12 */
out = silk_LSHIFT32( out, 1 );
return out;
}
#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) ((void)arch,silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order))
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
#include "arm/NSQ_neon.h"
#endif
#endif /* SILK_NSQ_H */

View File

@ -31,6 +31,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
#include "stack_alloc.h"
#include "NSQ.h"
typedef struct {
opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
@ -41,6 +43,7 @@ typedef struct {
opus_int32 Shape_Q14[ DECISION_DELAY ];
opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
opus_int32 LF_AR_Q14;
opus_int32 Diff_Q14;
opus_int32 Seed;
opus_int32 SeedInit;
opus_int32 RD_Q10;
@ -51,17 +54,21 @@ typedef struct {
opus_int32 RD_Q10;
opus_int32 xq_Q14;
opus_int32 LF_AR_Q14;
opus_int32 Diff_Q14;
opus_int32 sLTP_shp_Q14;
opus_int32 LPC_exc_Q14;
} NSQ_sample_struct;
typedef NSQ_sample_struct NSQ_sample_pair[ 2 ];
#if defined(MIPSr1_ASM)
#include "mips/NSQ_del_dec_mipsr1.h"
#endif
static OPUS_INLINE void silk_nsq_del_dec_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
const opus_int32 x_Q3[], /* I Input in Q3 */
const opus_int16 x16[], /* I Input */
opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */
const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@ -103,18 +110,19 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int decisionDelay /* I */
opus_int decisionDelay, /* I */
int arch /* I */
);
void silk_NSQ_del_dec(
void silk_NSQ_del_dec_c(
const silk_encoder_state *psEncC, /* I/O Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
@ -153,6 +161,7 @@ void silk_NSQ_del_dec(
psDD->SeedInit = psDD->Seed;
psDD->RD_Q10 = 0;
psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14;
psDD->Diff_Q14 = NSQ->sDiff_shp_Q14;
psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
@ -180,8 +189,7 @@ void silk_NSQ_del_dec(
LSF_interpolation_flag = 1;
}
ALLOC( sLTP_Q15,
psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
@ -193,7 +201,7 @@ void silk_NSQ_del_dec(
for( k = 0; k < psEncC->nb_subfr; k++ ) {
A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
B_Q14 = &LTPCoef_Q14[ k * LTP_ORDER ];
AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ];
AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ];
/* Noise shape parameters */
silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
@ -229,7 +237,8 @@ void silk_NSQ_del_dec(
psDD = &psDelDec[ Winner_ind ];
last_smple_idx = smpl_buf_idx + decisionDelay;
for( i = 0; i < decisionDelay; i++ ) {
last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
@ -244,22 +253,22 @@ void silk_NSQ_del_dec(
silk_assert( start_idx > 0 );
silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
NSQ->rewhite_flag = 1;
}
}
silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x16, x_sc_Q10, sLTP, sLTP_Q15, k,
psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch );
x_Q3 += psEncC->subfr_length;
x16 += psEncC->subfr_length;
pulses += psEncC->subfr_length;
pxq += psEncC->subfr_length;
}
@ -280,7 +289,9 @@ void silk_NSQ_del_dec(
last_smple_idx = smpl_buf_idx + decisionDelay;
Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
for( i = 0; i < decisionDelay; i++ ) {
last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
@ -291,6 +302,7 @@ void silk_NSQ_del_dec(
/* Update states */
NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
NSQ->sDiff_shp_Q14 = psDD->Diff_Q14;
NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
/* Save quantized speech signal */
@ -303,6 +315,7 @@ void silk_NSQ_del_dec(
/******************************************/
/* Noise shape quantizer for one subframe */
/******************************************/
#ifndef OVERRIDE_silk_noise_shape_quantizer_del_dec
static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
silk_nsq_state *NSQ, /* I/O NSQ state */
NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
@ -329,7 +342,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int decisionDelay /* I */
opus_int decisionDelay, /* I */
int arch /* I */
)
{
opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
@ -339,6 +353,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
#ifdef silk_short_prediction_create_arch_coef
opus_int32 a_Q12_arch[MAX_LPC_ORDER];
#endif
VARDECL( NSQ_sample_pair, psSampleState );
NSQ_del_dec_struct *psDD;
NSQ_sample_struct *psSS;
@ -351,6 +369,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 );
#ifdef silk_short_prediction_create_arch_coef
silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
#endif
for( i = 0; i < length; i++ ) {
/* Perform common calculations used in all states */
@ -394,33 +416,13 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Pointer used in short term prediction and shaping */
psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
/* Short-term prediction */
silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ 0 ], a_Q12[ 0 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
if( predictLPCOrder == 16 ) {
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
}
LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
/* Noise shape feedback */
silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
/* Output of lowpass section */
tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
tmp2 = silk_SMLAWB( psDD->Diff_Q14, psDD->sAR2_Q14[ 0 ], warping_Q16 );
/* Output of allpass section */
tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
psDD->sAR2_Q14[ 0 ] = tmp2;
@ -466,6 +468,19 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Find two quantization level candidates and measure their rate-distortion */
q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
if (Lambda_Q10 > 2048) {
/* For aggressive RDO, the bias becomes more than one pulse. */
int rdo_offset = Lambda_Q10/2 - 512;
if (q1_Q10 > rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 );
} else if (q1_Q10 < -rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 );
} else if (q1_Q10 < 0) {
q1_Q0 = -1;
} else {
q1_Q0 = 0;
}
}
if( q1_Q0 > 0 ) {
q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
@ -519,7 +534,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
/* Update states */
sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
psSS[ 0 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 );
sLF_AR_shp_Q14 = silk_SUB32( psSS[ 0 ].Diff_Q14, n_AR_Q14 );
psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14;
psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14;
@ -533,21 +549,22 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
exc_Q14 = -exc_Q14;
}
/* Add predictions */
LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
/* Update states */
sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
psSS[ 1 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 );
sLF_AR_shp_Q14 = silk_SUB32( psSS[ 1 ].Diff_Q14, n_AR_Q14 );
psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14;
psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14;
psSS[ 1 ].xq_Q14 = xq_Q14;
}
*smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */
last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */
*smpl_buf_idx = ( *smpl_buf_idx - 1 ) % DECISION_DELAY;
if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY;
last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY;
/* Find winner */
RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
@ -611,6 +628,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
psDD = &psDelDec[ k ];
psSS = &psSampleState[ k ][ 0 ];
psDD->LF_AR_Q14 = psSS->LF_AR_Q14;
psDD->Diff_Q14 = psSS->Diff_Q14;
psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14;
psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10;
@ -629,12 +647,13 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
}
RESTORE_STACK;
}
#endif /* OVERRIDE_silk_noise_shape_quantizer_del_dec */
static OPUS_INLINE void silk_nsq_del_dec_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
const opus_int32 x_Q3[], /* I Input in Q3 */
const opus_int16 x16[], /* I Input */
opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */
const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@ -648,29 +667,19 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
)
{
opus_int i, k, lag;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26;
NSQ_del_dec_struct *psDD;
lag = pitchL[ subfr ];
inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
silk_assert( inv_gain_Q31 != 0 );
/* Calculate gain adjustment factor */
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
} else {
gain_adj_Q16 = (opus_int32)1 << 16;
}
/* Scale input */
inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 );
for( i = 0; i < psEncC->subfr_length; i++ ) {
x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 );
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
/* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
if( NSQ->rewhite_flag ) {
if( subfr == 0 ) {
@ -684,7 +693,9 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
}
/* Adjust for changing gain */
if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
/* Scale long-term shaping state */
for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
@ -702,6 +713,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
/* Scale scalar states */
psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
psDD->Diff_Q14 = silk_SMULWW( gain_adj_Q16, psDD->Diff_Q14 );
/* Scale short-term prediction and shaping states */
for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
@ -715,5 +727,8 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
}
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
}
}

View File

@ -46,7 +46,8 @@ static OPUS_INLINE void silk_PLC_update(
static OPUS_INLINE void silk_PLC_conceal(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[] /* O LPC residual signal */
opus_int16 frame[], /* O LPC residual signal */
int arch /* I Run-time architecture */
);
@ -65,7 +66,8 @@ void silk_PLC(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[], /* I/O signal */
opus_int lost /* I Loss flag */
opus_int lost, /* I Loss flag */
int arch /* I Run-time architecture */
)
{
/* PLC control function */
@ -78,7 +80,7 @@ void silk_PLC(
/****************************/
/* Generate Signal */
/****************************/
silk_PLC_conceal( psDec, psDecCtrl, frame );
silk_PLC_conceal( psDec, psDecCtrl, frame, arch );
psDec->lossCnt++;
} else {
@ -165,10 +167,35 @@ static OPUS_INLINE void silk_PLC_update(
psPLC->nb_subfr = psDec->nb_subfr;
}
static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2,
const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr)
{
int i, k;
VARDECL( opus_int16, exc_buf );
opus_int16 *exc_buf_ptr;
SAVE_STACK;
ALLOC( exc_buf, 2*subfr_length, opus_int16 );
/* Find random noise component */
/* Scale previous excitation signal */
exc_buf_ptr = exc_buf;
for( k = 0; k < 2; k++ ) {
for( i = 0; i < subfr_length; i++ ) {
exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) );
}
exc_buf_ptr += subfr_length;
}
/* Find the subframe with lowest energy of the last two and use that as random noise generator */
silk_sum_sqr_shift( energy1, shift1, exc_buf, subfr_length );
silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length );
RESTORE_STACK;
}
static OPUS_INLINE void silk_PLC_conceal(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[] /* O LPC residual signal */
opus_int16 frame[], /* O LPC residual signal */
int arch /* I Run-time architecture */
)
{
opus_int i, j, k;
@ -177,19 +204,26 @@ static OPUS_INLINE void silk_PLC_conceal(
opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr;
opus_int32 LPC_pred_Q10, LTP_pred_Q12;
opus_int16 rand_scale_Q14;
opus_int16 *B_Q14, *exc_buf_ptr;
opus_int16 *B_Q14;
opus_int32 *sLPC_Q14_ptr;
VARDECL( opus_int16, exc_buf );
opus_int16 A_Q12[ MAX_LPC_ORDER ];
#ifdef SMALL_FOOTPRINT
opus_int16 *sLTP;
#else
VARDECL( opus_int16, sLTP );
#endif
VARDECL( opus_int32, sLTP_Q14 );
silk_PLC_struct *psPLC = &psDec->sPLC;
opus_int32 prevGain_Q10[2];
SAVE_STACK;
ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
#ifdef SMALL_FOOTPRINT
/* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */
sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length;
#else
ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
#endif
prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
@ -198,19 +232,7 @@ static OPUS_INLINE void silk_PLC_conceal(
silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) );
}
/* Find random noise component */
/* Scale previous excitation signal */
exc_buf_ptr = exc_buf;
for( k = 0; k < 2; k++ ) {
for( i = 0; i < psPLC->subfr_length; i++ ) {
exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) );
}
exc_buf_ptr += psPLC->subfr_length;
}
/* Find the subframe with lowest energy of the last two and use that as random noise generator */
silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length );
silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length );
silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr);
if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) {
/* First sub-frame has lowest energy */
@ -270,7 +292,7 @@ static OPUS_INLINE void silk_PLC_conceal(
/* Rewhiten LTP state */
idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
silk_assert( idx > 0 );
silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order );
silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order, arch );
/* Scale LTP state */
inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 );
inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 );
@ -306,8 +328,10 @@ static OPUS_INLINE void silk_PLC_conceal(
for( j = 0; j < LTP_ORDER; j++ ) {
B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 );
}
/* Gradually reduce excitation gain */
rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 );
if ( psDec->indices.signalType != TYPE_NO_VOICE_ACTIVITY ) {
/* Gradually reduce excitation gain */
rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 );
}
/* Slowly increase pitch lag */
psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 );
@ -343,7 +367,8 @@ static OPUS_INLINE void silk_PLC_conceal(
}
/* Add prediction to LPC excitation */
sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ],
silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ));
/* Scale with Gain */
frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );

View File

@ -48,7 +48,8 @@ void silk_PLC(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[], /* I/O signal */
opus_int lost /* I Loss flag */
opus_int lost, /* I Loss flag */
int arch /* I Run-time architecture */
);
void silk_PLC_glue_frames(

View File

@ -35,13 +35,17 @@ extern "C"
/*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */
#define SILK_MAX_ORDER_LPC 16 /* max order of the LPC analysis in schur() and k2a() */
#define SILK_MAX_ORDER_LPC 24 /* max order of the LPC analysis in schur() and k2a() */
#include <string.h> /* for memset(), memcpy(), memmove() */
#include "typedef.h"
#include "resampler_structs.h"
#include "macros.h"
#include "cpu_support.h"
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
#include "x86/SigProc_FIX_sse.h"
#endif
/********************************************************************/
/* SIGNAL PROCESSING FUNCTIONS */
@ -108,7 +112,8 @@ void silk_LPC_analysis_filter(
const opus_int16 *in, /* I Input signal */
const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
const opus_int32 len, /* I Signal length */
const opus_int32 d /* I Filter order */
const opus_int32 d, /* I Filter order */
int arch /* I Run-time architecture */
);
/* Chirp (bandwidth expand) LP AR filter */
@ -269,6 +274,15 @@ void silk_NLSF2A(
const opus_int d /* I filter order (should be even) */
);
/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */
void silk_LPC_fit(
opus_int16 *a_QOUT, /* O Output signal */
opus_int32 *a_QIN, /* I/O Input signal */
const opus_int QOUT, /* I Input Q domain */
const opus_int QIN, /* I Input Q domain */
const opus_int d /* I Filter order */
);
void silk_insertion_sort_increasing(
opus_int32 *a, /* I/O Unsorted / Sorted vector */
opus_int *idx, /* O Index vector for the sorted elements */
@ -303,7 +317,7 @@ void silk_NLSF_VQ_weights_laroia(
);
/* Compute reflection coefficients from input signal */
void silk_burg_modified(
void silk_burg_modified_c(
opus_int32 *res_nrg, /* O Residual energy */
opus_int *res_nrg_Q, /* O Residual energy Q value */
opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
@ -335,12 +349,15 @@ void silk_scale_vector32_Q26_lshift_18(
/********************************************************************/
/* return sum( inVec1[i] * inVec2[i] ) */
opus_int32 silk_inner_prod_aligned(
const opus_int16 *const inVec1, /* I input vector 1 */
const opus_int16 *const inVec2, /* I input vector 2 */
const opus_int len /* I vector lengths */
const opus_int len, /* I vector lengths */
int arch /* I Run-time architecture */
);
opus_int32 silk_inner_prod_aligned_scale(
const opus_int16 *const inVec1, /* I input vector 1 */
const opus_int16 *const inVec2, /* I input vector 2 */
@ -348,7 +365,7 @@ opus_int32 silk_inner_prod_aligned_scale(
const opus_int len /* I vector lengths */
);
opus_int64 silk_inner_prod16_aligned_64(
opus_int64 silk_inner_prod16_aligned_64_c(
const opus_int16 *inVec1, /* I input vector 1 */
const opus_int16 *inVec2, /* I input vector 2 */
const opus_int len /* I vector lengths */
@ -463,8 +480,7 @@ static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
/* Add with saturation for positive input values */
#define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT32(a, b) ((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT64(a, b) ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT32(a, b) ((((opus_uint32)(a)+(opus_uint32)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
#define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */
#define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */
@ -575,6 +591,14 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
/* the following seems faster on x86 */
#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len))
#endif
#include "Inlines.h"
#include "MacroCount.h"
#include "MacroDebug.h"
@ -587,6 +611,11 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
#include "arm/SigProc_FIX_armv5e.h"
#endif
#if defined(MIPSr1_ASM)
#include "mips/sigproc_fix_mipsr1.h"
#endif
#ifdef __cplusplus
}
#endif

Some files were not shown because too many files have changed in this diff Show More