merge official v7.8.1

This commit is contained in:
luvletter2333 2021-07-16 13:32:47 +08:00
commit 17dd08d9ba
No known key found for this signature in database
GPG Key ID: BFD68B892BECC1D8
428 changed files with 138039 additions and 2353 deletions

View File

@ -3,15 +3,15 @@ import cn.hutool.core.util.RuntimeUtil
apply plugin: "com.android.application" apply plugin: "com.android.application"
apply plugin: "kotlin-android" apply plugin: "kotlin-android"
def verName = "7.8.0-1" def verName = "7.8.1-preview01"
def verCode = 340 def verCode = 345
if (System.getenv("DEBUG_BUILD") == "true") { if (System.getenv("DEBUG_BUILD") == "true") {
verName += "-" + RuntimeUtil.execForStr("git log --pretty=format:'%h' -n 1") verName += "-" + RuntimeUtil.execForStr("git log --pretty=format:'%h' -n 1")
} }
def officialVer = "7.8.0" def officialVer = "7.8.1"
def officialCode = 2360 def officialCode = 2372
def serviceAccountCredentialsFile = rootProject.file("service_account_credentials.json") def serviceAccountCredentialsFile = rootProject.file("service_account_credentials.json")
@ -390,7 +390,7 @@ def playCoreVersion = "1.10.0"
dependencies { dependencies {
implementation "androidx.browser:browser:1.3.0" implementation "androidx.browser:browser:1.3.0"
implementation "androidx.core:core-ktx:1.6.0-beta01" implementation "androidx.core:core-ktx:1.6.0"
implementation "androidx.palette:palette-ktx:1.0.0" implementation "androidx.palette:palette-ktx:1.0.0"
implementation "androidx.viewpager:viewpager:1.0.0" implementation "androidx.viewpager:viewpager:1.0.0"
implementation "androidx.exifinterface:exifinterface:1.3.2" implementation "androidx.exifinterface:exifinterface:1.3.2"

View File

@ -718,7 +718,7 @@ target_include_directories(${NATIVE_LIB} PUBLIC
lz4) lz4)
target_link_libraries(${NATIVE_LIB} target_link_libraries(${NATIVE_LIB}
-Wl,--whole-archive rnnoise voipandroid -Wl,--no-whole-archive -Wl,--whole-archive rnnoise openh264 voipandroid -Wl,--no-whole-archive
tgvoip tgvoip
tgcalls tgcalls
tgcalls_tp tgcalls_tp

View File

@ -1182,7 +1182,7 @@ std::vector<std::pair<float, float>> gatherPositions(std::vector<std::pair<float
return result; return result;
} }
static float *pixelCache = nullptr; thread_local static float *pixelCache = nullptr;
JNIEXPORT void Java_org_telegram_messenger_Utilities_generateGradient(JNIEnv *env, jclass clazz, jobject bitmap, jboolean unpin, jint phase, jfloat progress, jint width, jint height, jint stride, jintArray colors) { JNIEXPORT void Java_org_telegram_messenger_Utilities_generateGradient(JNIEnv *env, jclass clazz, jobject bitmap, jboolean unpin, jint phase, jfloat progress, jint width, jint height, jint stride, jintArray colors) {
if (!bitmap) { if (!bitmap) {
@ -1217,29 +1217,29 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_generateGradient(JNIEnv *en
std::vector<std::pair<float, float>> current = gatherPositions(positions, phase); std::vector<std::pair<float, float>> current = gatherPositions(positions, phase);
auto colorsArray = (uint8_t *) env->GetIntArrayElements(colors, nullptr); auto colorsArray = (uint8_t *) env->GetIntArrayElements(colors, nullptr);
/*float *newPixelCache = nullptr; float *newPixelCache = nullptr;
if (pixelCache == nullptr) { if (pixelCache == nullptr) {
newPixelCache = new float[width * height * 2]; newPixelCache = new float[width * height * 2];
}*/ }
float directPixelY; float directPixelY;
float centerDistanceY; float centerDistanceY;
float centerDistanceY2; float centerDistanceY2;
int32_t colorsCount = colorsArray[12] == 0 ? 3 : 4; int32_t colorsCount = colorsArray[12] == 0 ? 3 : 4;
for (int y = 0; y < height; y++) { for (int y = 0; y < height; y++) {
//if (pixelCache == nullptr) { if (pixelCache == nullptr) {
directPixelY = (float) y / (float) height; directPixelY = (float) y / (float) height;
centerDistanceY = directPixelY - 0.5f; centerDistanceY = directPixelY - 0.5f;
centerDistanceY2 = centerDistanceY * centerDistanceY; centerDistanceY2 = centerDistanceY * centerDistanceY;
//} }
uint32_t offset = y * stride; uint32_t offset = y * stride;
for (int x = 0; x < width; x++) { for (int x = 0; x < width; x++) {
float pixelX; float pixelX;
float pixelY; float pixelY;
/*if (pixelCache != nullptr) { if (pixelCache != nullptr) {
pixelX = pixelCache[(y * width + x) * 2]; pixelX = pixelCache[(y * width + x) * 2];
pixelX = pixelCache[(y * width + x) * 2 + 1]; pixelY = pixelCache[(y * width + x) * 2 + 1];
} else {*/ } else {
float directPixelX = (float) x / (float) width; float directPixelX = (float) x / (float) width;
float centerDistanceX = directPixelX - 0.5f; float centerDistanceX = directPixelX - 0.5f;
@ -1250,9 +1250,9 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_generateGradient(JNIEnv *en
float sinTheta = sinf(theta); float sinTheta = sinf(theta);
float cosTheta = cosf(theta); float cosTheta = cosf(theta);
pixelX = /*newPixelCache[(y * width + x) * 2] =*/ std::max(0.0f, std::min(1.0f, 0.5f + centerDistanceX * cosTheta - centerDistanceY * sinTheta)); pixelX = newPixelCache[(y * width + x) * 2] = std::max(0.0f, std::min(1.0f, 0.5f + centerDistanceX * cosTheta - centerDistanceY * sinTheta));
pixelY = /*newPixelCache[(y * width + x) * 2 + 1] =*/ std::max(0.0f, std::min(1.0f, 0.5f + centerDistanceX * sinTheta + centerDistanceY * cosTheta)); pixelY = newPixelCache[(y * width + x) * 2 + 1] = std::max(0.0f, std::min(1.0f, 0.5f + centerDistanceX * sinTheta + centerDistanceY * cosTheta));
//} }
float distanceSum = 0.0f; float distanceSum = 0.0f;
@ -1282,10 +1282,10 @@ JNIEXPORT void Java_org_telegram_messenger_Utilities_generateGradient(JNIEnv *en
pixels[offset + x * 4 + 3] = 0xff; pixels[offset + x * 4 + 3] = 0xff;
} }
} }
/*if (newPixelCache != nullptr) { if (newPixelCache != nullptr) {
delete [] pixelCache; delete [] pixelCache;
pixelCache = newPixelCache; pixelCache = newPixelCache;
}*/ }
env->ReleaseIntArrayElements(colors, (jint *) colorsArray, JNI_ABORT); env->ReleaseIntArrayElements(colors, (jint *) colorsArray, JNI_ABORT);

File diff suppressed because it is too large Load Diff

View File

@ -32,27 +32,27 @@ thread_local static SHA256_CTX sha256Ctx;
Datacenter::Datacenter(int32_t instance, uint32_t id) { Datacenter::Datacenter(int32_t instance, uint32_t id) {
instanceNum = instance; instanceNum = instance;
datacenterId = id; datacenterId = id;
for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : uploadConnection) {
uploadConnection[a] = nullptr; a = nullptr;
} }
for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : downloadConnection) {
downloadConnection[a] = nullptr; a = nullptr;
} }
for (uint32_t a = 0; a < PROXY_CONNECTIONS_COUNT; a++) { for (auto & a : proxyConnection) {
proxyConnection[a] = nullptr; a = nullptr;
} }
} }
Datacenter::Datacenter(int32_t instance, NativeByteBuffer *data) { Datacenter::Datacenter(int32_t instance, NativeByteBuffer *data) {
instanceNum = instance; instanceNum = instance;
for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : uploadConnection) {
uploadConnection[a] = nullptr; a = nullptr;
} }
for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : downloadConnection) {
downloadConnection[a] = nullptr; a = nullptr;
} }
for (uint32_t a = 0; a < PROXY_CONNECTIONS_COUNT; a++) { for (auto & a : proxyConnection) {
proxyConnection[a] = nullptr; a = nullptr;
} }
uint32_t currentVersion = data->readUint32(nullptr); uint32_t currentVersion = data->readUint32(nullptr);
if (currentVersion >= 2 && currentVersion <= configVersion) { if (currentVersion >= 2 && currentVersion <= configVersion) {
@ -146,7 +146,7 @@ Datacenter::Datacenter(int32_t instance, NativeByteBuffer *data) {
authorized = data->readInt32(nullptr) != 0; authorized = data->readInt32(nullptr) != 0;
len = data->readUint32(nullptr); len = data->readUint32(nullptr);
for (uint32_t a = 0; a < len; a++) { for (uint32_t a = 0; a < len; a++) {
TL_future_salt *salt = new TL_future_salt(); auto salt = new TL_future_salt();
salt->valid_since = data->readInt32(nullptr); salt->valid_since = data->readInt32(nullptr);
salt->valid_until = data->readInt32(nullptr); salt->valid_until = data->readInt32(nullptr);
salt->salt = data->readInt64(nullptr); salt->salt = data->readInt64(nullptr);
@ -155,7 +155,7 @@ Datacenter::Datacenter(int32_t instance, NativeByteBuffer *data) {
if (currentVersion >= 13) { if (currentVersion >= 13) {
len = data->readUint32(nullptr); len = data->readUint32(nullptr);
for (uint32_t a = 0; a < len; a++) { for (uint32_t a = 0; a < len; a++) {
TL_future_salt *salt = new TL_future_salt(); auto salt = new TL_future_salt();
salt->valid_since = data->readInt32(nullptr); salt->valid_since = data->readInt32(nullptr);
salt->valid_until = data->readInt32(nullptr); salt->valid_until = data->readInt32(nullptr);
salt->salt = data->readInt64(nullptr); salt->salt = data->readInt64(nullptr);
@ -223,9 +223,9 @@ TcpAddress *Datacenter::getCurrentAddress(uint32_t flags) {
return nullptr; return nullptr;
} }
if ((flags & TcpAddressFlagStatic) != 0) { if ((flags & TcpAddressFlagStatic) != 0) {
for (std::vector<TcpAddress>::iterator iter = addresses->begin(); iter != addresses->end(); iter++) { for (auto & addresse : *addresses) {
if ((iter->flags & TcpAddressFlagStatic) != 0) { if ((addresse.flags & TcpAddressFlagStatic) != 0) {
return &(*iter); return &addresse;
} }
} }
} }
@ -288,8 +288,8 @@ int32_t Datacenter::getCurrentPort(uint32_t flags) {
if ((flags & TcpAddressFlagStatic) != 0) { if ((flags & TcpAddressFlagStatic) != 0) {
uint32_t num = 0; uint32_t num = 0;
for (std::vector<TcpAddress>::iterator iter = addresses->begin(); iter != addresses->end(); iter++) { for (auto & addresse : *addresses) {
if ((iter->flags & TcpAddressFlagStatic) != 0) { if ((addresse.flags & TcpAddressFlagStatic) != 0) {
currentAddressNum = num; currentAddressNum = num;
break; break;
} }
@ -362,8 +362,8 @@ void Datacenter::addAddressAndPort(std::string address, uint32_t port, uint32_t
addresses = &addressesIpv4; addresses = &addressesIpv4;
} }
} }
for (std::vector<TcpAddress>::iterator iter = addresses->begin(); iter != addresses->end(); iter++) { for (auto & addresse : *addresses) {
if (iter->address == address && iter->port == port) { if (addresse.address == address && addresse.port == port) {
return; return;
} }
} }
@ -749,14 +749,14 @@ void Datacenter::suspendConnections(bool suspendPush) {
if (tempConnection != nullptr) { if (tempConnection != nullptr) {
tempConnection->suspendConnection(); tempConnection->suspendConnection();
} }
for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : uploadConnection) {
if (uploadConnection[a] != nullptr) { if (a != nullptr) {
uploadConnection[a]->suspendConnection(); a->suspendConnection();
} }
} }
for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : downloadConnection) {
if (downloadConnection[a] != nullptr) { if (a != nullptr) {
downloadConnection[a]->suspendConnection(); a->suspendConnection();
} }
} }
} }
@ -771,19 +771,19 @@ void Datacenter::getSessions(std::vector<int64_t> &sessions) {
if (tempConnection != nullptr) { if (tempConnection != nullptr) {
sessions.push_back(tempConnection->getSessionId()); sessions.push_back(tempConnection->getSessionId());
} }
for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : uploadConnection) {
if (uploadConnection[a] != nullptr) { if (a != nullptr) {
sessions.push_back(uploadConnection[a]->getSessionId()); sessions.push_back(a->getSessionId());
} }
} }
for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : downloadConnection) {
if (downloadConnection[a] != nullptr) { if (a != nullptr) {
sessions.push_back(downloadConnection[a]->getSessionId()); sessions.push_back(a->getSessionId());
} }
} }
for (uint32_t a = 0; a < PROXY_CONNECTIONS_COUNT; a++) { for (auto & a : proxyConnection) {
if (proxyConnection[a] != nullptr) { if (a != nullptr) {
sessions.push_back(proxyConnection[a]->getSessionId()); sessions.push_back(a->getSessionId());
} }
} }
} }
@ -796,21 +796,21 @@ void Datacenter::recreateSessions(HandshakeType type) {
if (tempConnection != nullptr) { if (tempConnection != nullptr) {
tempConnection->recreateSession(); tempConnection->recreateSession();
} }
for (uint32_t a = 0; a < UPLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : uploadConnection) {
if (uploadConnection[a] != nullptr) { if (a != nullptr) {
uploadConnection[a]->recreateSession(); a->recreateSession();
} }
} }
for (uint32_t a = 0; a < PROXY_CONNECTIONS_COUNT; a++) { for (auto & a : proxyConnection) {
if (proxyConnection[a] != nullptr) { if (a != nullptr) {
proxyConnection[a]->recreateSession(); a->recreateSession();
} }
} }
} }
if (type == HandshakeTypeAll || type == HandshakeTypeMediaTemp || type == HandshakeTypePerm) { if (type == HandshakeTypeAll || type == HandshakeTypeMediaTemp || type == HandshakeTypePerm) {
for (uint32_t a = 0; a < DOWNLOAD_CONNECTIONS_COUNT; a++) { for (auto & a : downloadConnection) {
if (downloadConnection[a] != nullptr) { if (a != nullptr) {
downloadConnection[a]->recreateSession(); a->recreateSession();
} }
} }
if (genericMediaConnection != nullptr) { if (genericMediaConnection != nullptr) {
@ -883,8 +883,8 @@ bool Datacenter::isHandshaking(bool media) {
if (media && (isCdnDatacenter || !PFS_ENABLED)) { if (media && (isCdnDatacenter || !PFS_ENABLED)) {
media = false; media = false;
} }
for (std::vector<std::unique_ptr<Handshake>>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { for (auto & iter : handshakes) {
Handshake *handshake = iter->get(); Handshake *handshake = iter.get();
if (handshake->getType() == HandshakeTypePerm || (media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) { if (handshake->getType() == HandshakeTypePerm || (media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) {
return true; return true;
} }
@ -896,8 +896,8 @@ bool Datacenter::isHandshaking(HandshakeType type) {
if (handshakes.empty()) { if (handshakes.empty()) {
return false; return false;
} }
for (std::vector<std::unique_ptr<Handshake>>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { for (auto & iter : handshakes) {
Handshake *handshake = iter->get(); Handshake *handshake = iter.get();
if (handshake->getType() == type) { if (handshake->getType() == type) {
return true; return true;
} }
@ -907,28 +907,28 @@ bool Datacenter::isHandshaking(HandshakeType type) {
void Datacenter::beginHandshake(HandshakeType handshakeType, bool reconnect) { void Datacenter::beginHandshake(HandshakeType handshakeType, bool reconnect) {
if (handshakeType == HandshakeTypeCurrent) { if (handshakeType == HandshakeTypeCurrent) {
for (std::vector<std::unique_ptr<Handshake>>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { for (auto & iter : handshakes) {
Handshake *handshake = iter->get(); Handshake *handshake = iter.get();
handshake->beginHandshake(reconnect); handshake->beginHandshake(reconnect);
} }
} else { } else {
if (authKeyPerm == nullptr) { if (authKeyPerm == nullptr) {
if (!isHandshaking(HandshakeTypePerm)) { if (!isHandshaking(HandshakeTypePerm)) {
Handshake *handshake = new Handshake(this, HandshakeTypePerm, this); auto handshake = new Handshake(this, HandshakeTypePerm, this);
handshakes.push_back(std::unique_ptr<Handshake>(handshake)); handshakes.push_back(std::unique_ptr<Handshake>(handshake));
handshake->beginHandshake(reconnect); handshake->beginHandshake(reconnect);
} }
} else if (PFS_ENABLED) { } else if (PFS_ENABLED) {
if (handshakeType == HandshakeTypeAll || handshakeType == HandshakeTypeTemp) { if (handshakeType == HandshakeTypeAll || handshakeType == HandshakeTypeTemp) {
if (!isHandshaking(HandshakeTypeTemp)) { if (!isHandshaking(HandshakeTypeTemp)) {
Handshake *handshake = new Handshake(this, HandshakeTypeTemp, this); auto handshake = new Handshake(this, HandshakeTypeTemp, this);
handshakes.push_back(std::unique_ptr<Handshake>(handshake)); handshakes.push_back(std::unique_ptr<Handshake>(handshake));
handshake->beginHandshake(reconnect); handshake->beginHandshake(reconnect);
} }
} }
if ((handshakeType == HandshakeTypeAll || handshakeType == HandshakeTypeMediaTemp) && hasMediaAddress()) { if ((handshakeType == HandshakeTypeAll || handshakeType == HandshakeTypeMediaTemp) && hasMediaAddress()) {
if (!isHandshaking(HandshakeTypeMediaTemp)) { if (!isHandshaking(HandshakeTypeMediaTemp)) {
Handshake *handshake = new Handshake(this, HandshakeTypeMediaTemp, this); auto handshake = new Handshake(this, HandshakeTypeMediaTemp, this);
handshakes.push_back(std::unique_ptr<Handshake>(handshake)); handshakes.push_back(std::unique_ptr<Handshake>(handshake));
handshake->beginHandshake(reconnect); handshake->beginHandshake(reconnect);
} }
@ -942,9 +942,9 @@ void Datacenter::onHandshakeConnectionClosed(Connection *connection) {
return; return;
} }
bool media = connection->getConnectionType() == ConnectionTypeGenericMedia; bool media = connection->getConnectionType() == ConnectionTypeGenericMedia;
for (std::vector<std::unique_ptr<Handshake>>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { for (auto & iter : handshakes) {
Handshake *handshake = iter->get(); Handshake *handshake = iter.get();
if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() != HandshakeTypeMediaTemp) { if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) {
handshake->onHandshakeConnectionClosed(); handshake->onHandshakeConnectionClosed();
} }
} }
@ -955,9 +955,9 @@ void Datacenter::onHandshakeConnectionConnected(Connection *connection) {
return; return;
} }
bool media = connection->getConnectionType() == ConnectionTypeGenericMedia; bool media = connection->getConnectionType() == ConnectionTypeGenericMedia;
for (std::vector<std::unique_ptr<Handshake>>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { for (auto & iter : handshakes) {
Handshake *handshake = iter->get(); Handshake *handshake = iter.get();
if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() != HandshakeTypeMediaTemp) { if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) {
handshake->onHandshakeConnectionConnected(); handshake->onHandshakeConnectionConnected();
} }
} }
@ -986,9 +986,9 @@ void Datacenter::processHandshakeResponse(bool media, TLObject *message, int64_t
if (handshakes.empty()) { if (handshakes.empty()) {
return; return;
} }
for (std::vector<std::unique_ptr<Handshake>>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { for (auto & iter : handshakes) {
Handshake *handshake = iter->get(); Handshake *handshake = iter.get();
if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() != HandshakeTypeMediaTemp) { if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) {
handshake->processHandshakeResponse(message, messageId); handshake->processHandshakeResponse(message, messageId);
} }
} }
@ -998,9 +998,9 @@ TLObject *Datacenter::getCurrentHandshakeRequest(bool media) {
if (handshakes.empty()) { if (handshakes.empty()) {
return nullptr; return nullptr;
} }
for (std::vector<std::unique_ptr<Handshake>>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { for (auto & iter : handshakes) {
Handshake *handshake = iter->get(); Handshake *handshake = iter.get();
if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() != HandshakeTypeMediaTemp) { if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() != HandshakeTypeMediaTemp)) {
return handshake->getCurrentHandshakeRequest(); return handshake->getCurrentHandshakeRequest();
} }
} }
@ -1069,9 +1069,9 @@ ByteArray *Datacenter::getAuthKey(ConnectionType connectionType, bool perm, int6
bool media = Connection::isMediaConnectionType(connectionType) && hasMediaAddress(); bool media = Connection::isMediaConnectionType(connectionType) && hasMediaAddress();
ByteArray *authKeyPending = nullptr; ByteArray *authKeyPending = nullptr;
int64_t authKeyPendingId = 0; int64_t authKeyPendingId = 0;
for (std::vector<std::unique_ptr<Handshake>>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { for (auto & iter : handshakes) {
Handshake *handshake = iter->get(); Handshake *handshake = iter.get();
if (media && handshake->getType() == HandshakeTypeMediaTemp || !media && handshake->getType() == HandshakeTypeTemp) { if ((media && handshake->getType() == HandshakeTypeMediaTemp) || (!media && handshake->getType() == HandshakeTypeTemp)) {
authKeyPending = handshake->getPendingAuthKey(); authKeyPending = handshake->getPendingAuthKey();
authKeyPendingId = handshake->getPendingAuthKeyId(); authKeyPendingId = handshake->getPendingAuthKeyId();
break; break;
@ -1118,12 +1118,12 @@ NativeByteBuffer *Datacenter::createRequestsData(std::vector<std::unique_ptr<Net
} }
if (LOGS_ENABLED) DEBUG_D("connection(%p, account%u, dc%u, type %d) send message (session: 0x%" PRIx64 ", seqno: %d, messageid: 0x%" PRIx64 "): %s(%p)", connection, instanceNum, datacenterId, connection->getConnectionType(), (uint64_t) connection->getSessionId(), networkMessage->message->seqno, (uint64_t) networkMessage->message->msg_id, typeid(*messageBody).name(), messageBody); if (LOGS_ENABLED) DEBUG_D("connection(%p, account%u, dc%u, type %d) send message (session: 0x%" PRIx64 ", seqno: %d, messageid: 0x%" PRIx64 "): %s(%p)", connection, instanceNum, datacenterId, connection->getConnectionType(), (uint64_t) connection->getSessionId(), networkMessage->message->seqno, (uint64_t) networkMessage->message->msg_id, typeid(*messageBody).name(), messageBody);
int64_t messageTime = (int64_t) (networkMessage->message->msg_id / 4294967296.0 * 1000); auto messageTime = (int64_t) (networkMessage->message->msg_id / 4294967296.0 * 1000);
int64_t currentTime = ConnectionsManager::getInstance(instanceNum).getCurrentTimeMillis() + (int64_t) ConnectionsManager::getInstance(instanceNum).getTimeDifference() * 1000; int64_t currentTime = ConnectionsManager::getInstance(instanceNum).getCurrentTimeMillis() + (int64_t) ConnectionsManager::getInstance(instanceNum).getTimeDifference() * 1000;
if (!pfsInit && (messageTime < currentTime - 30000 || messageTime > currentTime + 25000)) { if (!pfsInit && (networkMessage->forceContainer || messageTime < currentTime - 30000 || messageTime > currentTime + 25000)) {
if (LOGS_ENABLED) DEBUG_D("wrap message in container"); if (LOGS_ENABLED) DEBUG_D("wrap message in container");
TL_msg_container *messageContainer = new TL_msg_container(); auto messageContainer = new TL_msg_container();
messageContainer->messages.push_back(std::move(networkMessage->message)); messageContainer->messages.push_back(std::move(networkMessage->message));
messageId = ConnectionsManager::getInstance(instanceNum).generateMessageId(); messageId = ConnectionsManager::getInstance(instanceNum).generateMessageId();
@ -1136,7 +1136,7 @@ NativeByteBuffer *Datacenter::createRequestsData(std::vector<std::unique_ptr<Net
} }
} else { } else {
if (LOGS_ENABLED) DEBUG_D("start write messages to container"); if (LOGS_ENABLED) DEBUG_D("start write messages to container");
TL_msg_container *messageContainer = new TL_msg_container(); auto messageContainer = new TL_msg_container();
size_t count = requests.size(); size_t count = requests.size();
for (uint32_t a = 0; a < count; a++) { for (uint32_t a = 0; a < count; a++) {
NetworkMessage *networkMessage = requests[a].get(); NetworkMessage *networkMessage = requests[a].get();
@ -1400,7 +1400,7 @@ Connection *Datacenter::getConnectionByType(uint32_t connectionType, bool create
void Datacenter::onHandshakeComplete(Handshake *handshake, int64_t keyId, ByteArray *authKey, int32_t timeDifference) { void Datacenter::onHandshakeComplete(Handshake *handshake, int64_t keyId, ByteArray *authKey, int32_t timeDifference) {
HandshakeType type = handshake->getType(); HandshakeType type = handshake->getType();
for (std::vector<std::unique_ptr<Handshake>>::iterator iter = handshakes.begin(); iter != handshakes.end(); iter++) { for (auto iter = handshakes.begin(); iter != handshakes.end(); iter++) {
if (iter->get() == handshake) { if (iter->get() == handshake) {
handshakes.erase(iter); handshakes.erase(iter);
if (type == HandshakeTypePerm) { if (type == HandshakeTypePerm) {
@ -1431,13 +1431,13 @@ void Datacenter::exportAuthorization() {
return; return;
} }
exportingAuthorization = true; exportingAuthorization = true;
TL_auth_exportAuthorization *request = new TL_auth_exportAuthorization(); auto request = new TL_auth_exportAuthorization();
request->dc_id = datacenterId; request->dc_id = datacenterId;
if (LOGS_ENABLED) DEBUG_D("dc%u begin export authorization", datacenterId); if (LOGS_ENABLED) DEBUG_D("dc%u begin export authorization", datacenterId);
ConnectionsManager::getInstance(instanceNum).sendRequest(request, [&](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) { ConnectionsManager::getInstance(instanceNum).sendRequest(request, [&](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) {
if (error == nullptr) { if (error == nullptr) {
TL_auth_exportedAuthorization *res = (TL_auth_exportedAuthorization *) response; auto res = (TL_auth_exportedAuthorization *) response;
TL_auth_importAuthorization *request2 = new TL_auth_importAuthorization(); auto request2 = new TL_auth_importAuthorization();
request2->bytes = std::move(res->bytes); request2->bytes = std::move(res->bytes);
request2->id = res->id; request2->id = res->id;
if (LOGS_ENABLED) DEBUG_D("dc%u begin import authorization", datacenterId); if (LOGS_ENABLED) DEBUG_D("dc%u begin import authorization", datacenterId);
@ -1497,7 +1497,7 @@ TL_help_configSimple *Datacenter::decodeSimpleConfig(NativeByteBuffer *buffer) {
BIO *keyBio = BIO_new(BIO_s_mem()); BIO *keyBio = BIO_new(BIO_s_mem());
BIO_write(keyBio, public_key.c_str(), (int) public_key.length()); BIO_write(keyBio, public_key.c_str(), (int) public_key.length());
RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, NULL, NULL, NULL); RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, nullptr, nullptr, nullptr);
if (rsaKey == nullptr) { if (rsaKey == nullptr) {
if (rsaKey == nullptr) { if (rsaKey == nullptr) {
if (LOGS_ENABLED) DEBUG_E("Invalid rsa public key"); if (LOGS_ENABLED) DEBUG_E("Invalid rsa public key");

View File

@ -19,7 +19,6 @@
#define USE_DEBUG_SESSION false #define USE_DEBUG_SESSION false
#define READ_BUFFER_SIZE 1024 * 128 #define READ_BUFFER_SIZE 1024 * 128
//#define DEBUG_VERSION //#define DEBUG_VERSION
#define USE_OLD_KEYS
#define PFS_ENABLED 1 #define PFS_ENABLED 1
#define DEFAULT_DATACENTER_ID INT_MAX #define DEFAULT_DATACENTER_ID INT_MAX
#define DC_UPDATE_TIME 60 * 60 #define DC_UPDATE_TIME 60 * 60
@ -64,6 +63,7 @@ typedef struct NetworkMessage {
std::unique_ptr<TL_message> message; std::unique_ptr<TL_message> message;
bool invokeAfter = false; bool invokeAfter = false;
bool needQuickAck = false; bool needQuickAck = false;
bool forceContainer = false;
int32_t requestId; int32_t requestId;
} NetworkMessage; } NetworkMessage;
@ -172,7 +172,8 @@ enum RequestFlag {
RequestFlagForceDownload = 32, RequestFlagForceDownload = 32,
RequestFlagInvokeAfter = 64, RequestFlagInvokeAfter = 64,
RequestFlagNeedQuickAck = 128, RequestFlagNeedQuickAck = 128,
RequestFlagUseUnboundKey = 256 RequestFlagUseUnboundKey = 256,
RequestFlagResendAfter = 512
}; };
inline std::string to_string_int32(int32_t value) { inline std::string to_string_int32(int32_t value) {

View File

@ -8,6 +8,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <algorithm> #include <algorithm>
#include <memory>
#include <openssl/rand.h> #include <openssl/rand.h>
#include <openssl/sha.h> #include <openssl/sha.h>
#include <openssl/bn.h> #include <openssl/bn.h>
@ -55,19 +56,11 @@ void Handshake::beginHandshake(bool reconnect) {
connection->connect(); connection->connect();
} }
#ifdef USE_OLD_KEYS auto request = new TL_req_pq_multi();
TL_req_pq *request = new TL_req_pq(); request->nonce = std::make_unique<ByteArray>(16);
request->nonce = std::unique_ptr<ByteArray>(new ByteArray(16));
RAND_bytes(request->nonce->bytes, 16); RAND_bytes(request->nonce->bytes, 16);
authNonce = new ByteArray(request->nonce.get()); authNonce = new ByteArray(request->nonce.get());
sendRequestData(request, true); sendRequestData(request, true);
#else
TL_req_pq_multi *request = new TL_req_pq_multi();
request->nonce = std::unique_ptr<ByteArray>(new ByteArray(16));
RAND_bytes(request->nonce->bytes, 16);
authNonce = new ByteArray(request->nonce.get());
sendRequestData(request, true);
#endif
} }
void Handshake::cleanupHandshake() { void Handshake::cleanupHandshake() {
@ -335,14 +328,14 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) {
} }
handshakeState = 2; handshakeState = 2;
TL_resPQ *result = (TL_resPQ *) message; auto result = (TL_resPQ *) message;
if (authNonce->isEqualTo(result->nonce.get())) { if (authNonce->isEqualTo(result->nonce.get())) {
std::string key; std::string key = "";
int64_t keyFingerprint = 0; int64_t keyFingerprint = 0;
size_t count1 = result->server_public_key_fingerprints.size(); size_t count1 = result->server_public_key_fingerprints.size();
if (currentDatacenter->isCdnDatacenter) { if (currentDatacenter->isCdnDatacenter) {
std::map<int32_t, uint64_t>::iterator iter = cdnPublicKeysFingerprints.find(currentDatacenter->datacenterId); auto iter = cdnPublicKeysFingerprints.find(currentDatacenter->datacenterId);
if (iter != cdnPublicKeysFingerprints.end()) { if (iter != cdnPublicKeysFingerprints.end()) {
for (uint32_t a = 0; a < count1; a++) { for (uint32_t a = 0; a < count1; a++) {
if ((uint64_t) result->server_public_key_fingerprints[a] == iter->second) { if ((uint64_t) result->server_public_key_fingerprints[a] == iter->second) {
@ -353,95 +346,35 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) {
} }
} else { } else {
if (serverPublicKeys.empty()) { if (serverPublicKeys.empty()) {
#ifdef USE_OLD_KEYS if (ConnectionsManager::getInstance(currentDatacenter->instanceNum).testBackend) {
serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" serverPublicKeys.emplace_back("-----BEGIN RSA PUBLIC KEY-----\n"
"MIIBCgKCAQEAwVACPi9w23mF3tBkdZz+zwrzKOaaQdr01vAbU4E1pvkfj4sqDsm6\n" "MIIBCgKCAQEAyMEdY1aR+sCR3ZSJrtztKTKqigvO/vBfqACJLZtS7QMgCGXJ6XIR\n"
"lyDONS789sVoD/xCS9Y0hkkC3gtL1tSfTlgCMOOul9lcixlEKzwKENj1Yz/s7daS\n" "yy7mx66W0/sOFa7/1mAZtEoIokDP3ShoqF4fVNb6XeqgQfaUHd8wJpDWHcR2OFwv\n"
"an9tqw3bfUV/nqgbhGX81v/+7RFAEd+RwFnK7a+XYl9sluzHRyVVaTTveB2GazTw\n" "plUUI1PLTktZ9uW2WE23b+ixNwJjJGwBDJPQEQFBE+vfmH0JP503wr5INS1poWg/\n"
"Efzk2DWgkBluml8OREmvfraX3bkHZJTKX4EQSjBbbdJ2ZXIsRrYOXfaA+xayEGB+\n" "j25sIWeYPHYeOrFp/eXaqhISP6G+q2IeTaWTXpwZj4LzXq5YOpk4bYEQ6mvRq7D1\n"
"8hdlLmAjbCVfaigxX0CDqWeR1yFL9kwd9P0NsZRPsmoqVwMbMu7mStFai6aIhc3n\n" "aHWfYmlEGepfaYR8Q0YqvvhYtMte3ITnuSJs171+GDqpdKcSwHnd6FudwGO4pcCO\n"
"Slv8kg9qv1m6XHVQY3PnEw+QQtqSIXklHwIDAQAB\n" "j4WcDuXc2CTHgH8gFTNhp/Y8/SpDOhvn9QIDAQAB\n"
"-----END RSA PUBLIC KEY-----"); "-----END RSA PUBLIC KEY-----");
serverPublicKeysFingerprints.push_back(0xc3b42b026ce86b21LL); serverPublicKeysFingerprints.push_back(0xb25898df208d2603);
} else {
serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n" serverPublicKeys.emplace_back("-----BEGIN RSA PUBLIC KEY-----\n"
"MIIBCgKCAQEAxq7aeLAqJR20tkQQMfRn+ocfrtMlJsQ2Uksfs7Xcoo77jAid0bRt\n" "MIIBCgKCAQEA6LszBcC1LGzyr992NzE0ieY+BSaOW622Aa9Bd4ZHLl+TuFQ4lo4g\n"
"ksiVmT2HEIJUlRxfABoPBV8wY9zRTUMaMA654pUX41mhyVN+XoerGxFvrs9dF1Ru\n" "5nKaMBwK/BIb9xUfg0Q29/2mgIR6Zr9krM7HjuIcCzFvDtr+L0GQjae9H0pRB2OO\n"
"vCHbI02dM2ppPvyytvvMoefRoL5BTcpAihFgm5xCaakgsJ/tH5oVl74CdhQw8J5L\n" "62cECs5HKhT5DZ98K33vmWiLowc621dQuwKWSQKjWf50XYFw42h21P2KXUGyp2y/\n"
"xI/K++KJBUyZ26Uba1632cOiq05JBUW0Z2vWIOk4BLysk7+U9z+SxynKiZR3/xdi\n" "+aEyZ+uVgLLQbRA1dEjSDZ2iGRy12Mk5gpYc397aYp438fsJoHIgJ2lgMv5h7WY9\n"
"XvFKk01R3BHV+GUKM2RYazpS/P8v7eyKhAbKxOdRcFpHLlVwfjyM1VlDQrEZxsMp\n" "t6N/byY9Nw9p21Og3AoXSL2q/2IJ1WRUhebgAdGVMlV1fkuOQoEzR7EdpqtQD9Cs\n"
"NTLYXb6Sce1Uov0YtNx5wEowlREH1WOTlwIDAQAB\n" "5+bfo3Nhmcyvk5ftB0WkJ9z6bNZ7yxrP8wIDAQAB\n"
"-----END RSA PUBLIC KEY-----"); "-----END RSA PUBLIC KEY-----");
serverPublicKeysFingerprints.push_back(0x9a996a1db11c729bLL); serverPublicKeysFingerprints.push_back(0xd09d1d85de64fd85);
}
serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n"
"MIIBCgKCAQEAsQZnSWVZNfClk29RcDTJQ76n8zZaiTGuUsi8sUhW8AS4PSbPKDm+\n"
"DyJgdHDWdIF3HBzl7DHeFrILuqTs0vfS7Pa2NW8nUBwiaYQmPtwEa4n7bTmBVGsB\n"
"1700/tz8wQWOLUlL2nMv+BPlDhxq4kmJCyJfgrIrHlX8sGPcPA4Y6Rwo0MSqYn3s\n"
"g1Pu5gOKlaT9HKmE6wn5Sut6IiBjWozrRQ6n5h2RXNtO7O2qCDqjgB2vBxhV7B+z\n"
"hRbLbCmW0tYMDsvPpX5M8fsO05svN+lKtCAuz1leFns8piZpptpSCFn7bWxiA9/f\n"
"x5x17D7pfah3Sy2pA+NDXyzSlGcKdaUmwQIDAQAB\n"
"-----END RSA PUBLIC KEY-----");
serverPublicKeysFingerprints.push_back(0xb05b2a6f70cdea78LL);
serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n"
"MIIBCgKCAQEAwqjFW0pi4reKGbkc9pK83Eunwj/k0G8ZTioMMPbZmW99GivMibwa\n"
"xDM9RDWabEMyUtGoQC2ZcDeLWRK3W8jMP6dnEKAlvLkDLfC4fXYHzFO5KHEqF06i\n"
"qAqBdmI1iBGdQv/OQCBcbXIWCGDY2AsiqLhlGQfPOI7/vvKc188rTriocgUtoTUc\n"
"/n/sIUzkgwTqRyvWYynWARWzQg0I9olLBBC2q5RQJJlnYXZwyTL3y9tdb7zOHkks\n"
"WV9IMQmZmyZh/N7sMbGWQpt4NMchGpPGeJ2e5gHBjDnlIf2p1yZOYeUYrdbwcS0t\n"
"UiggS4UeE8TzIuXFQxw7fzEIlmhIaq3FnwIDAQAB\n"
"-----END RSA PUBLIC KEY-----");
serverPublicKeysFingerprints.push_back(0x71e025b6c76033e3LL);
#endif
serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n"
"MIIBCgKCAQEAruw2yP/BCcsJliRoW5eBVBVle9dtjJw+OYED160Wybum9SXtBBLX\n"
"riwt4rROd9csv0t0OHCaTmRqBcQ0J8fxhN6/cpR1GWgOZRUAiQxoMnlt0R93LCX/\n"
"j1dnVa/gVbCjdSxpbrfY2g2L4frzjJvdl84Kd9ORYjDEAyFnEA7dD556OptgLQQ2\n"
"e2iVNq8NZLYTzLp5YpOdO1doK+ttrltggTCy5SrKeLoCPPbOgGsdxJxyz5KKcZnS\n"
"Lj16yE5HvJQn0CNpRdENvRUXe6tBP78O39oJ8BTHp9oIjd6XWXAsp2CvK45Ol8wF\n"
"XGF710w9lwCGNbmNxNYhtIkdqfsEcwR5JwIDAQAB\n"
"-----END RSA PUBLIC KEY-----");
serverPublicKeysFingerprints.push_back(0xbc35f3509f7b7a5LL);
serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n"
"MIIBCgKCAQEAvfLHfYH2r9R70w8prHblWt/nDkh+XkgpflqQVcnAfSuTtO05lNPs\n"
"pQmL8Y2XjVT4t8cT6xAkdgfmmvnvRPOOKPi0OfJXoRVylFzAQG/j83u5K3kRLbae\n"
"7fLccVhKZhY46lvsueI1hQdLgNV9n1cQ3TDS2pQOCtovG4eDl9wacrXOJTG2990V\n"
"jgnIKNA0UMoP+KF03qzryqIt3oTvZq03DyWdGK+AZjgBLaDKSnC6qD2cFY81UryR\n"
"WOab8zKkWAnhw2kFpcqhI0jdV5QaSCExvnsjVaX0Y1N0870931/5Jb9ICe4nweZ9\n"
"kSDF/gip3kWLG0o8XQpChDfyvsqB9OLV/wIDAQAB\n"
"-----END RSA PUBLIC KEY-----");
serverPublicKeysFingerprints.push_back(0x15ae5fa8b5529542LL);
serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n"
"MIIBCgKCAQEAs/ditzm+mPND6xkhzwFIz6J/968CtkcSE/7Z2qAJiXbmZ3UDJPGr\n"
"zqTDHkO30R8VeRM/Kz2f4nR05GIFiITl4bEjvpy7xqRDspJcCFIOcyXm8abVDhF+\n"
"th6knSU0yLtNKuQVP6voMrnt9MV1X92LGZQLgdHZbPQz0Z5qIpaKhdyA8DEvWWvS\n"
"Uwwc+yi1/gGaybwlzZwqXYoPOhwMebzKUk0xW14htcJrRrq+PXXQbRzTMynseCoP\n"
"Ioke0dtCodbA3qQxQovE16q9zz4Otv2k4j63cz53J+mhkVWAeWxVGI0lltJmWtEY\n"
"K6er8VqqWot3nqmWMXogrgRLggv/NbbooQIDAQAB\n"
"-----END RSA PUBLIC KEY-----");
serverPublicKeysFingerprints.push_back(0xaeae98e13cd7f94fLL);
serverPublicKeys.push_back("-----BEGIN RSA PUBLIC KEY-----\n"
"MIIBCgKCAQEAvmpxVY7ld/8DAjz6F6q05shjg8/4p6047bn6/m8yPy1RBsvIyvuD\n"
"uGnP/RzPEhzXQ9UJ5Ynmh2XJZgHoE9xbnfxL5BXHplJhMtADXKM9bWB11PU1Eioc\n"
"3+AXBB8QiNFBn2XI5UkO5hPhbb9mJpjA9Uhw8EdfqJP8QetVsI/xrCEbwEXe0xvi\n"
"fRLJbY08/Gp66KpQvy7g8w7VB8wlgePexW3pT13Ap6vuC+mQuJPyiHvSxjEKHgqe\n"
"Pji9NP3tJUFQjcECqcm0yV7/2d0t/pbCm+ZH1sadZspQCEPPrtbkQBlvHb4OLiIW\n"
"PGHKSMeRFvp3IWcmdJqXahxLCUS1Eh6MAQIDAQAB\n"
"-----END RSA PUBLIC KEY-----");
serverPublicKeysFingerprints.push_back(0x5a181b2235057d98LL);
} }
size_t count2 = serverPublicKeysFingerprints.size(); size_t count2 = serverPublicKeysFingerprints.size();
for (uint32_t a = 0; a < count2; a++) { for (uint32_t a = 0; a < count1; a++) {
for (uint32_t b = 0; b < count1; b++) { for (uint32_t b = 0; b < count2; b++) {
if ((uint64_t) result->server_public_key_fingerprints[b] == serverPublicKeysFingerprints[a]) { if ((uint64_t) result->server_public_key_fingerprints[a] == serverPublicKeysFingerprints[b]) {
keyFingerprint = result->server_public_key_fingerprints[b]; keyFingerprint = result->server_public_key_fingerprints[a];
key = serverPublicKeys[a]; key = serverPublicKeys[b];
break; break;
} }
} }
@ -478,15 +411,15 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) {
return; return;
} }
TL_req_DH_params *request = new TL_req_DH_params(); auto request = new TL_req_DH_params();
request->nonce = std::unique_ptr<ByteArray>(new ByteArray(authNonce)); request->nonce = std::make_unique<ByteArray>(new ByteArray(authNonce));
request->server_nonce = std::unique_ptr<ByteArray>(new ByteArray(authServerNonce)); request->server_nonce = std::make_unique<ByteArray>(new ByteArray(authServerNonce));
request->p = std::unique_ptr<ByteArray>(new ByteArray(4)); request->p = std::make_unique<ByteArray>(new ByteArray(4));
request->p->bytes[3] = (uint8_t) p; request->p->bytes[3] = (uint8_t) p;
request->p->bytes[2] = (uint8_t) (p >> 8); request->p->bytes[2] = (uint8_t) (p >> 8);
request->p->bytes[1] = (uint8_t) (p >> 16); request->p->bytes[1] = (uint8_t) (p >> 16);
request->p->bytes[0] = (uint8_t) (p >> 24); request->p->bytes[0] = (uint8_t) (p >> 24);
request->q = std::unique_ptr<ByteArray>(new ByteArray(4)); request->q = std::make_unique<ByteArray>(new ByteArray(4));
request->q->bytes[3] = (uint8_t) q; request->q->bytes[3] = (uint8_t) q;
request->q->bytes[2] = (uint8_t) (q >> 8); request->q->bytes[2] = (uint8_t) (q >> 8);
request->q->bytes[1] = (uint8_t) (q >> 16); request->q->bytes[1] = (uint8_t) (q >> 16);
@ -495,13 +428,13 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) {
TLObject *innerData; TLObject *innerData;
if (handshakeType == HandshakeTypePerm) { if (handshakeType == HandshakeTypePerm) {
TL_p_q_inner_data_dc *tl_p_q_inner_data = new TL_p_q_inner_data_dc(); auto tl_p_q_inner_data = new TL_p_q_inner_data_dc();
tl_p_q_inner_data->nonce = std::unique_ptr<ByteArray>(new ByteArray(authNonce)); tl_p_q_inner_data->nonce = std::make_unique<ByteArray>(authNonce);
tl_p_q_inner_data->server_nonce = std::unique_ptr<ByteArray>(new ByteArray(authServerNonce)); tl_p_q_inner_data->server_nonce = std::make_unique<ByteArray>(authServerNonce);
tl_p_q_inner_data->pq = std::unique_ptr<ByteArray>(new ByteArray(result->pq.get())); tl_p_q_inner_data->pq = std::make_unique<ByteArray>(new ByteArray(result->pq.get()));
tl_p_q_inner_data->p = std::unique_ptr<ByteArray>(new ByteArray(request->p.get())); tl_p_q_inner_data->p = std::make_unique<ByteArray>(new ByteArray(request->p.get()));
tl_p_q_inner_data->q = std::unique_ptr<ByteArray>(new ByteArray(request->q.get())); tl_p_q_inner_data->q = std::make_unique<ByteArray>(new ByteArray(request->q.get()));
tl_p_q_inner_data->new_nonce = std::unique_ptr<ByteArray>(new ByteArray(32)); tl_p_q_inner_data->new_nonce = std::make_unique<ByteArray>(new ByteArray(32));
if (ConnectionsManager::getInstance(currentDatacenter->instanceNum).testBackend) { if (ConnectionsManager::getInstance(currentDatacenter->instanceNum).testBackend) {
tl_p_q_inner_data->dc = 10000 + currentDatacenter->datacenterId; tl_p_q_inner_data->dc = 10000 + currentDatacenter->datacenterId;
} else { } else {
@ -511,13 +444,13 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) {
authNewNonce = new ByteArray(tl_p_q_inner_data->new_nonce.get()); authNewNonce = new ByteArray(tl_p_q_inner_data->new_nonce.get());
innerData = tl_p_q_inner_data; innerData = tl_p_q_inner_data;
} else { } else {
TL_p_q_inner_data_temp_dc *tl_p_q_inner_data_temp = new TL_p_q_inner_data_temp_dc(); auto tl_p_q_inner_data_temp = new TL_p_q_inner_data_temp_dc();
tl_p_q_inner_data_temp->nonce = std::unique_ptr<ByteArray>(new ByteArray(authNonce)); tl_p_q_inner_data_temp->nonce = std::make_unique<ByteArray>(new ByteArray(authNonce));
tl_p_q_inner_data_temp->server_nonce = std::unique_ptr<ByteArray>(new ByteArray(authServerNonce)); tl_p_q_inner_data_temp->server_nonce = std::make_unique<ByteArray>(new ByteArray(authServerNonce));
tl_p_q_inner_data_temp->pq = std::unique_ptr<ByteArray>(new ByteArray(result->pq.get())); tl_p_q_inner_data_temp->pq = std::make_unique<ByteArray>(new ByteArray(result->pq.get()));
tl_p_q_inner_data_temp->p = std::unique_ptr<ByteArray>(new ByteArray(request->p.get())); tl_p_q_inner_data_temp->p = std::make_unique<ByteArray>(new ByteArray(request->p.get()));
tl_p_q_inner_data_temp->q = std::unique_ptr<ByteArray>(new ByteArray(request->q.get())); tl_p_q_inner_data_temp->q = std::make_unique<ByteArray>(new ByteArray(request->q.get()));
tl_p_q_inner_data_temp->new_nonce = std::unique_ptr<ByteArray>(new ByteArray(32)); tl_p_q_inner_data_temp->new_nonce = std::make_unique<ByteArray>(new ByteArray(32));
if (handshakeType == HandshakeTypeMediaTemp) { if (handshakeType == HandshakeTypeMediaTemp) {
if (ConnectionsManager::getInstance(currentDatacenter->instanceNum).testBackend) { if (ConnectionsManager::getInstance(currentDatacenter->instanceNum).testBackend) {
tl_p_q_inner_data_temp->dc = -(10000 + currentDatacenter->datacenterId); tl_p_q_inner_data_temp->dc = -(10000 + currentDatacenter->datacenterId);
@ -538,29 +471,79 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) {
} }
uint32_t innerDataSize = innerData->getObjectSize(); uint32_t innerDataSize = innerData->getObjectSize();
uint32_t additionalSize = innerDataSize + SHA_DIGEST_LENGTH < 255 ? 255 - (innerDataSize + SHA_DIGEST_LENGTH) : 0; if (innerDataSize > 144) {
NativeByteBuffer *innerDataBuffer = BuffersStorage::getInstance().getFreeBuffer(innerDataSize + additionalSize + SHA_DIGEST_LENGTH); if (LOGS_ENABLED) DEBUG_E("account%u dc%u handshake: inner data too large %d, type = %d", currentDatacenter->instanceNum, currentDatacenter->datacenterId, innerDataSize, handshakeType);
innerDataBuffer->position(SHA_DIGEST_LENGTH); delete innerData;
beginHandshake(false);
return;
}
uint32_t keySize = 32;
uint32_t ivSize = 32;
uint32_t paddedDataSize = 192;
uint32_t encryptedDataSize = keySize + paddedDataSize + SHA256_DIGEST_LENGTH;
uint32_t additionalSize = innerDataSize < paddedDataSize ? paddedDataSize - innerDataSize : 0;
NativeByteBuffer *innerDataBuffer = BuffersStorage::getInstance().getFreeBuffer(encryptedDataSize + paddedDataSize + ivSize + SHA256_DIGEST_LENGTH);
innerDataBuffer->position(encryptedDataSize);
innerData->serializeToStream(innerDataBuffer); innerData->serializeToStream(innerDataBuffer);
delete innerData; delete innerData;
SHA1(innerDataBuffer->bytes() + SHA_DIGEST_LENGTH, innerDataSize, innerDataBuffer->bytes());
if (additionalSize != 0) {
RAND_bytes(innerDataBuffer->bytes() + SHA_DIGEST_LENGTH + innerDataSize, additionalSize);
}
BIO *keyBio = BIO_new(BIO_s_mem()); BIO *keyBio = BIO_new(BIO_s_mem());
BIO_write(keyBio, key.c_str(), (int) key.length()); BIO_write(keyBio, key.c_str(), (int) key.length());
RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, NULL, NULL, NULL); RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, nullptr, nullptr, nullptr);
BIO_free(keyBio); BIO_free(keyBio);
while (true) {
RAND_bytes(innerDataBuffer->bytes() + encryptedDataSize + innerDataSize, additionalSize);
for (uint32_t i = 0; i < paddedDataSize; i++) {
innerDataBuffer->bytes()[keySize + i] = innerDataBuffer->bytes()[encryptedDataSize + paddedDataSize - i - 1];
}
RAND_bytes(innerDataBuffer->bytes(), keySize);
SHA256_CTX sha256Ctx;
SHA256_Init(&sha256Ctx);
SHA256_Update(&sha256Ctx, innerDataBuffer->bytes(), keySize);
SHA256_Update(&sha256Ctx, innerDataBuffer->bytes() + encryptedDataSize, paddedDataSize);
SHA256_Final(innerDataBuffer->bytes() + keySize + paddedDataSize, &sha256Ctx);
memset(innerDataBuffer->bytes() + encryptedDataSize + paddedDataSize, 0, ivSize);
Datacenter::aesIgeEncryption(innerDataBuffer->bytes() + keySize, innerDataBuffer->bytes(), innerDataBuffer->bytes() + encryptedDataSize + paddedDataSize, true, true, paddedDataSize + SHA256_DIGEST_LENGTH);
SHA256_Init(&sha256Ctx);
SHA256_Update(&sha256Ctx, innerDataBuffer->bytes() + keySize, paddedDataSize + SHA256_DIGEST_LENGTH);
SHA256_Final(innerDataBuffer->bytes() + encryptedDataSize + paddedDataSize + ivSize, &sha256Ctx);
for (uint32_t i = 0; i < keySize; i++) {
innerDataBuffer->bytes()[i] ^= innerDataBuffer->bytes()[encryptedDataSize + paddedDataSize + ivSize + i];
}
bool ok = false;
size_t resLen = BN_bn2bin(rsaKey->n, innerDataBuffer->bytes() + encryptedDataSize);
const auto shift = (256 - resLen);
for (auto i = 0; i != 256; ++i) {
const auto a = innerDataBuffer->bytes()[i];
const auto b = (i < shift) ? 0 : innerDataBuffer->bytes()[encryptedDataSize + i - shift];
if (a > b) {
break;
} else if (a < b) {
ok = true;
break;
}
}
if (ok) {
break;
}
}
if (bnContext == nullptr) { if (bnContext == nullptr) {
bnContext = BN_CTX_new(); bnContext = BN_CTX_new();
} }
BIGNUM *a = BN_bin2bn(innerDataBuffer->bytes(), innerDataBuffer->limit(), NULL); BIGNUM *a = BN_bin2bn(innerDataBuffer->bytes(), encryptedDataSize, nullptr);
BIGNUM *r = BN_new(); BIGNUM *r = BN_new();
BN_mod_exp(r, a, rsaKey->e, rsaKey->n, bnContext); BN_mod_exp(r, a, rsaKey->e, rsaKey->n, bnContext);
uint32_t size = BN_num_bytes(r); uint32_t size = BN_num_bytes(r);
ByteArray *rsaEncryptedData = new ByteArray(size >= 256 ? size : 256); auto rsaEncryptedData = new ByteArray(size >= 256 ? size : 256);
size_t resLen = BN_bn2bin(r, rsaEncryptedData->bytes); size_t resLen = BN_bn2bin(r, rsaEncryptedData->bytes);
if (256 - resLen > 0) { if (256 - resLen > 0) {
memset(rsaEncryptedData->bytes + resLen, 0, 256 - resLen); memset(rsaEncryptedData->bytes + resLen, 0, 256 - resLen);
@ -911,7 +894,7 @@ void Handshake::processHandshakeResponse(TLObject *message, int64_t messageId) {
} }
void Handshake::sendAckRequest(int64_t messageId) { void Handshake::sendAckRequest(int64_t messageId) {
/*TL_msgs_ack *msgsAck = new TL_msgs_ack(); /*auto msgsAck = new TL_msgs_ack();
msgsAck->msg_ids.push_back(messageId); msgsAck->msg_ids.push_back(messageId);
sendRequestData(msgsAck, false);*/ sendRequestData(msgsAck, false);*/
} }
@ -923,10 +906,10 @@ TLObject *Handshake::getCurrentHandshakeRequest() {
void Handshake::saveCdnConfigInternal(NativeByteBuffer *buffer) { void Handshake::saveCdnConfigInternal(NativeByteBuffer *buffer) {
buffer->writeInt32(1); buffer->writeInt32(1);
buffer->writeInt32((int32_t) cdnPublicKeys.size()); buffer->writeInt32((int32_t) cdnPublicKeys.size());
for (std::map<int32_t, std::string>::iterator iter = cdnPublicKeys.begin(); iter != cdnPublicKeys.end(); iter++) { for (auto & cdnPublicKey : cdnPublicKeys) {
buffer->writeInt32(iter->first); buffer->writeInt32(cdnPublicKey.first);
buffer->writeString(iter->second); buffer->writeString(cdnPublicKey.second);
buffer->writeInt64(cdnPublicKeysFingerprints[iter->first]); buffer->writeInt64(cdnPublicKeysFingerprints[cdnPublicKey.first]);
} }
} }
@ -934,7 +917,7 @@ void Handshake::saveCdnConfig(Datacenter *datacenter) {
if (cdnConfig == nullptr) { if (cdnConfig == nullptr) {
cdnConfig = new Config(datacenter->instanceNum, "cdnkeys.dat"); cdnConfig = new Config(datacenter->instanceNum, "cdnkeys.dat");
} }
thread_local static NativeByteBuffer *sizeCalculator = new NativeByteBuffer(true); thread_local static auto sizeCalculator = new NativeByteBuffer(true);
sizeCalculator->clearCapacity(); sizeCalculator->clearCapacity();
saveCdnConfigInternal(sizeCalculator); saveCdnConfigInternal(sizeCalculator);
NativeByteBuffer *buffer = BuffersStorage::getInstance().getFreeBuffer(sizeCalculator->capacity()); NativeByteBuffer *buffer = BuffersStorage::getInstance().getFreeBuffer(sizeCalculator->capacity());
@ -978,11 +961,11 @@ void Handshake::loadCdnConfig(Datacenter *datacenter) {
} }
} }
loadingCdnKeys = true; loadingCdnKeys = true;
TL_help_getCdnConfig *request = new TL_help_getCdnConfig(); auto request = new TL_help_getCdnConfig();
ConnectionsManager::getInstance(datacenter->instanceNum).sendRequest(request, [&, datacenter](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) { ConnectionsManager::getInstance(datacenter->instanceNum).sendRequest(request, [&, datacenter](TLObject *response, TL_error *error, int32_t networkType, int64_t responseTime) {
if (response != nullptr) { if (response != nullptr) {
TL_cdnConfig *config = (TL_cdnConfig *) response; auto config = (TL_cdnConfig *) response;
size_t count = config->public_keys.size(); size_t count = config->public_keys.size();
BIO *keyBio = BIO_new(BIO_s_mem()); BIO *keyBio = BIO_new(BIO_s_mem());
NativeByteBuffer *buffer = BuffersStorage::getInstance().getFreeBuffer(1024); NativeByteBuffer *buffer = BuffersStorage::getInstance().getFreeBuffer(1024);
@ -992,7 +975,7 @@ void Handshake::loadCdnConfig(Datacenter *datacenter) {
cdnPublicKeys[publicKey->dc_id] = publicKey->public_key; cdnPublicKeys[publicKey->dc_id] = publicKey->public_key;
BIO_write(keyBio, publicKey->public_key.c_str(), (int) publicKey->public_key.length()); BIO_write(keyBio, publicKey->public_key.c_str(), (int) publicKey->public_key.length());
RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, NULL, NULL, NULL); RSA *rsaKey = PEM_read_bio_RSAPublicKey(keyBio, nullptr, nullptr, nullptr);
int nBytes = BN_num_bytes(rsaKey->n); int nBytes = BN_num_bytes(rsaKey->n);
int eBytes = BN_num_bytes(rsaKey->e); int eBytes = BN_num_bytes(rsaKey->e);

View File

@ -47,6 +47,7 @@ public:
int64_t startTimeMillis = 0; int64_t startTimeMillis = 0;
int32_t minStartTime = 0; int32_t minStartTime = 0;
int32_t lastResendTime = 0; int32_t lastResendTime = 0;
bool isResending = false;
int32_t instanceNum = 0; int32_t instanceNum = 0;
uint32_t serverFailureCount = 0; uint32_t serverFailureCount = 0;
TLObject *rawRequest; TLObject *rawRequest;

View File

@ -0,0 +1,55 @@
# Contributors to the OpenH264 project
Patrick Ai
Sijia Chen
ZhaoZheng Chu
Paley Du
Martin Ettl
Andreas Gal
Xu Guang
Licai Guo
Yi Guo
Horace Huang
Steven Huang
Ethan Hugg
Cullen Jennings
Zhaofeng Jia
Derrick Jin
Jesse Li
Jifei Li
Kai Li
Karina Li
Matt Li
Xiang Li
Bourne Ling
Alex Liu
Wayne Liu
Varun Patil
Eric Rescorla
Adam Roach
Sawyer Shan
Siping Tao
Martin Storsjö
Brion Vibber
James Wang
Juanny Wang
Zhiliang Wang
Hervé Willems
Gregory J Wolfe
Katherine Wu
Guang Xu
Jeffery Xu
Gang Yang
Li Yao
Jiessie Zhang
Rory Zhang
Volvet Zhang
Ling Zhu
James Zhu
Dong Zhang
Haibo Zhu
Huade Shi

View File

@ -0,0 +1,23 @@
Copyright (c) 2013, Cisco Systems
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,211 @@
OpenH264
========
OpenH264 is a codec library which supports H.264 encoding and decoding. It is suitable for use in real time applications such as WebRTC. See http://www.openh264.org/ for more details.
Encoder Features
----------------
- Constrained Baseline Profile up to Level 5.2 (Max frame size is 36864 macro-blocks)
- Arbitrary resolution, not constrained to multiples of 16x16
- Rate control with adaptive quantization, or constant quantization
- Slice options: 1 slice per frame, N slices per frame, N macroblocks per slice, or N bytes per slice
- Multiple threads automatically used for multiple slices
- Temporal scalability up to 4 layers in a dyadic hierarchy
- Simulcast AVC up to 4 resolutions from a single input
- Spatial simulcast up to 4 resolutions from a single input
- Long Term Reference (LTR) frames
- Memory Management Control Operation (MMCO)
- Reference picture list modification
- Single reference frame for inter prediction
- Multiple reference frames when using LTR and/or 3-4 temporal layers
- Periodic and on-demand Instantaneous Decoder Refresh (IDR) frame insertion
- Dynamic changes to bit rate, frame rate, and resolution
- Annex B byte stream output
- YUV 4:2:0 planar input
Decoder Features
----------------
- Constrained Baseline Profile up to Level 5.2 (Max frame size is 36864 macro-blocks)
- Arbitrary resolution, not constrained to multiples of 16x16
- Single thread for all slices
- Long Term Reference (LTR) frames
- Memory Management Control Operation (MMCO)
- Reference picture list modification
- Multiple reference frames when specified in Sequence Parameter Set (SPS)
- Annex B byte stream input
- YUV 4:2:0 planar output
OS Support
----------
- Windows 64-bit and 32-bit
- Mac OS X 64-bit and 32-bit
- Linux 64-bit and 32-bit
- Android 64-bit and 32-bit
- iOS 64-bit and 32-bit
- Windows Phone 32-bit
Processor Support
-----------------
- Intel x86 optionally with MMX/SSE (no AVX yet, help is welcome)
- ARMv7 optionally with NEON, AArch64 optionally with NEON
- Any architecture using C/C++ fallback functions
Building the Library
--------------------
NASM needed to be installed for assembly code: workable version 2.10.06 or above, NASM can downloaded from http://www.nasm.us/.
For Mac OSX 64-bit NASM needed to be below version 2.11.08 as NASM 2.11.08 will introduce error when using RIP-relative addresses in Mac OSX 64-bit
To build the arm assembly for Windows Phone, gas-preprocessor is required. It can be downloaded from git://git.libav.org/gas-preprocessor.git
For Android Builds
------------------
To build for android platform, You need to install android sdk and ndk. You also need to export `**ANDROID_SDK**/tools` to PATH. On Linux, this can be done by
export PATH=**ANDROID_SDK**/tools:$PATH
The codec and demo can be built by
make OS=android NDKROOT=**ANDROID_NDK** TARGET=**ANDROID_TARGET**
Valid `**ANDROID_TARGET**` can be found in `**ANDROID_SDK**/platforms`, such as `android-12`.
You can also set `ARCH`, `NDKLEVEL` according to your device and NDK version.
`ARCH` specifies the architecture of android device. Currently `arm`, `arm64`, `x86` and `x86_64` are supported, the default is `arm`. (`mips` and `mips64` can also be used, but there's no specific optimization for those architectures.)
`NDKLEVEL` specifies android api level, the default is 12. Available possibilities can be found in `**ANDROID_NDK**/platforms`, such as `android-21` (strip away the `android-` prefix).
By default these commands build for the `armeabi-v7a` ABI. To build for the other android
ABIs, add `ARCH=arm64`, `ARCH=x86`, `ARCH=x86_64`, `ARCH=mips` or `ARCH=mips64`.
To build for the older `armeabi` ABI (which has armv5te as baseline), add `APP_ABI=armeabi` (`ARCH=arm` is implicit).
To build for 64-bit ABI, such as `arm64`, explicitly set `NDKLEVEL` to 21 or higher.
For iOS Builds
--------------
You can build the libraries and demo applications using xcode project files
located in `codec/build/iOS/dec` and `codec/build/iOS/enc`.
You can also build the libraries (but not the demo applications) using the
make based build system from the command line. Build with
make OS=ios ARCH=**ARCH**
Valid values for `**ARCH**` are the normal iOS architecture names such as
`armv7`, `armv7s`, `arm64`, and `i386` and `x86_64` for the simulator.
Another settable iOS specific parameter
is `SDK_MIN`, specifying the minimum deployment target for the built library.
For other details on building using make on the command line, see
'For All Platforms' below.
For Linux Builds
--------------
You can build the libraries (but not the demo applications) using the
make based build system from the command line. Build with
make OS=linux ARCH=**ARCH**
You can set `ARCH` according to your linux device .
`ARCH` specifies the architecture of the device. Currently `arm`, `arm64`, `x86` and `x86_64` are supported
NOTICE:
If your computer is x86 architecture, for build the libnary which be used on arm/aarch64 machine, you may need to use cross-compiler, for example:
make OS=linux CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ ARCH=arm64
or
make OS=linux CC=arm-linux-gnueabi-gcc CXX=arm-linux-gnueabi-g++ ARCH=arm
For Windows Builds
------------------
Our Windows builds use MinGW which can be downloaded from http://www.mingw.org/
To build with gcc, add the MinGW bin directory (e.g. `/c/MinGW/bin`) to your path and follow the 'For All Platforms' instructions below.
To build with Visual Studio you will need to set up your path to run cl.exe. The easiest way is to start MSYS from a developer command line session. Instructions can be found at http://msdn.microsoft.com/en-us/library/ms229859(v=vs.110).aspx. If you need to do it by hand here is an example from a Windows 64bit install of VS2012:
export PATH="$PATH:/c/Program Files (x86)/Microsoft Visual Studio 11.0/VC/bin:/c/Program Files (x86)/Microsoft Visual Studio 11.0/Common7/IDE"
You will also need to set your INCLUDE and LIB paths to point to your VS and SDK installs. Something like this, again from Win64 with VS2012 (note the use of Windows-style paths here).
export INCLUDE="C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\include;C:\Program Files (x86)\Windows Kits\8.0\Include\um;C:\Program Files (x86)\Windows Kits\8.0\Include\shared"
export LIB="C:\Program Files (x86)\Windows Kits\8.0\Lib\Win8\um\x86;C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\lib"
Then add `OS=msvc` to the make line of the 'For All Platforms' instructions.
For Windows Phone Builds
------------------------
Follow the instructions above for normal Windows builds, but use `OS=msvc-wp`
instead of `OS=msvc`. You will also need gas-preprocessor (as mentioned below
"Building the Library").
If building for Windows Phone with MSVC 2013, there's no included bat file that sets the lib paths to the Windows Phone kit, but that can be done with a command like this:
export LIB="c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\lib\store\arm;c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\lib\arm;c:\Program Files (x86)\Windows Phone Kits\8.1\lib\arm"
This is only necessary for building the DLL; the static library can be built without setting this.
Note, only Windows Phone 8.1 or newer is supported, 8.0 is no longer supported.
For All Platforms
-------------------
Using make
----------
From the main project directory:
- `make` for automatically detecting architecture and building accordingly
- `make ARCH=i386` for x86 32-bit builds
- `make ARCH=x86_64` for x86 64-bit builds
- `make V=No` for a silent build (not showing the actual compiler commands)
- `make DEBUGSYMBOLS=True` for two libraries, one is normal libraries, another one is removed the debugging symbol table entries (those created by the -g option)
The command line programs `h264enc` and `h264dec` will appear in the main project directory.
A shell script to run the command-line apps is in `testbin/CmdLineExample.sh`
Usage information can be found in `testbin/CmdLineReadMe`
Using meson
-----------
Meson build definitions have been added, and are known to work on Linux
and Windows, for x86 and x86 64-bit.
See <http://mesonbuild.com/Installing.html> for instructions on how to
install meson, then:
``` shell
meson builddir
ninja -C builddir
```
Run the tests with:
``` shell
meson test -C builddir -v
```
Install with:
``` shell
ninja -C builddir install
```
Using the Source
----------------
- `codec` - encoder, decoder, console (test app), build (makefile, vcproj)
- `build` - scripts for Makefile build system
- `test` - GTest unittest files
- `testbin` - autobuild scripts, test app config files
- `res` - yuv and bitstream test files
Known Issues
------------
See the issue tracker on https://github.com/cisco/openh264/issues
- Encoder errors when resolution exceeds 3840x2160
- Encoder errors when compressed frame size exceeds half uncompressed size
- Decoder errors when compressed frame size exceeds 1MB
- Encoder RC requires frame skipping to be enabled to hit the target bitrate,
if frame skipping is disabled the target bitrate may be exceeded
License
-------
BSD, see `LICENSE` file for details.

View File

@ -0,0 +1,346 @@
Releases
-----------
v2.1.0
------
- Experimentally support for multi-thread decoding(default disabled,and may result in random problems if enabled)
- Assembly optimization for loongson platform
- Update meson version to 5
- Some minor bug fixes
v2.0.0
------
- B-frame decoding support for Main and High Profile with two test cases
- Add support for loongson(https://en.wikipedia.org/wiki/Loongson) platform
- Add clang support for arm/arm64/x86 for NDK version over 17
- Enable stack protector
- Add some test cases
- Avoid using C++/CX code for threads for Windows Phone/Windows Store/UWP
- Remove extra visual studio projects for the decoder
- Remove check for working compiler in NDK
- Bug fixes
v1.8.0
------
- Add meson build for Linux/Windows platform
- Disable background detection for screen route
- Add a workaround for Visual Studio 2013 C++ x64 compiler bug on AVX2. That bug will cause crash and has been fixed in Visual Studio 2014
- Change the default profile from baseline to high if user does not set it and CABAC is specified
- Skip frames that are marked as IDR due to scene change and simultaneously marked as skip frame to reduce bit rate
- Refine threshold calculation algorithms for rate control in lower frame rate to get better effect
- Encoder return with a specific return value instead of uninitialize encoder when input resolution is invalid
- Refine strategy on level change to avoid frequent IDR. Encoder will not be reset if level is changed to a smaller one
- Support to set the min and max QP values on screen content mode
- Fix a memory issue that may cause encoder crash when temporal layer change
- Corrected some statistics information
- Refine error concealment algorithms to improve user experience
- Support to get information about current output picture is reference picture or not on decoder side
- Bug fix for decoder when 8x8 prediction mode is enabled on the input bitstream
- Enable NEON for ChromeOS devices
- Support for Fuchsia operating systerm
- Support for building arm64 with MSVC
- Remove some warnings when building with MSVC
- Fix clang compiler error when building arm assembly funtions
- Bug fixes for unit test
v1.7.0
------
- Changed SPS/PPS strategy option name,See enum ENCODER_OPTION
- Changed NAL size length parameter from static array to pointer to support more NALs.See struct SParserBsInfo
- Changed semaphores to condition variables on apple platform
- Changed version update mechanism as Major.Minor.patch,like 1.7.0
- Supported to force IDR independently for each layer in simulcast AVC case.See API ForceIntraFrame()
- Supported LTR request independently for each layer in simulcast AVC case.See struct SLTRRecoverRequest and SLTRMarkingFeedback
- Supported to set sample aspect ratio in VUI on encoder side. See struct SSpatialLayerConfig
- Supported to set profile and level, changed the default level as 4.1 if the user doesnt set it. See enum ELevelIdc
- Supported to get profile and level info on decoder side.See enum DECODER_OPTION
- Supported for enable/disable AVX2 build option. Build option: HAVE_AVX2
- Supported to set decoder statistics log interval, Add DECODER_OPTION_STATISTICS_LOG_INTERVAL.See DECODER_OPTION.
- Supported for AU delimiter NAL on decoder side. AU delimiter refers to section 7.3.2.4
- Supported for x86 PIC assembly and build option. Build option: ENABLEPIC. git issues:#2263 #2534
- Supported for Cygwin x86_64 build
- Supported to get sample aspect ratio by GetOption on decoder. Add option: DECODER_OPTION_GET_SAR_INFO
- Set constraint_set4_flag constraint_set5_flag to align to CHP definition in latest H264 standard
- Improved VUI support on decoder side
- Improved decoder statistics info output
- Refined the return value when failed in memory allocation
- Added SSSE3 motion compensation routines
- Added AVX2 motion compensation routines
- Optimization on some of SSE2/MMX functions
- Refactor rate control for RC_BUFFERBASED_MODE and RC_QUALITY_MODE mode
- Added more unit tests for random resolution input,slice mode switch,profile/level setting
- Refined logs
- Bug fixes for 4:0:0 format support on decoder
- Bug fixes for complexity calculation for screen content mode
- Bug fixes for loadbalancing turn on, git issue:#2618
- Bug fixes for parser subsps, scalling list, parser longer bitstream
v1.6.0
------
- Adjusted the encoder API structures
- Removed the unused data format in decoder API
- Encoder support of simulcast AVC
- Added support of video signal type present information
- Added support of encoder load-balancing
- Improved encoder multi-threads, rate control and down-sampling
- Fixed the frame size constraint in encoder
- Bug fixes for rate control, multi-threading, simulcasting in encoder
- Bug fixes for interface call, return value check, memory leak in decoder
- Bug fixes for UT and statistic information
- Bug fixes for assembly code
- Remove the unused and redundant code
- Improvements on UT, memory allocation failed protection, error-protection in decoder, input parameters checking in encoder, assembly for AVX2 support, assembly code performance, logging and documentation
- Correct some typos in source code and documents
v1.5.3
------
- Bug fixes for GMP Plugin
v1.5.2
------
- Fix GMP Plugin causing the Browser crash on Android
v1.5.1
------
- Bug fixes for GMP Plugin
v1.5.0
------
- Correct a typo in codec return value (github issue#2046, cmUnkonwReason -> cmUnknownReason)
- Added Codec demo and auto build script for WP8
- Decoder support of 'Constrained High Profile' of H.264
- Encoder support of CABAC of H.264
- Encoder support of input frame rate 60
- Improved syntax of gaps_in_frame_num_value_allowed_flag in encoder
- Improved memory usage for multi-threading in encoder
- Added VUI info for base layer in encoder
- Added encoder interface to get external setting of iMaxQp and iMinQp for rate control
- Bug fixes for Rate Control, multi-threading and simulcasting in encoder
- Bug fixes for NoDelay API, ParseOnly functions, error-concealment off functiond and error-detection in decoder
- Bug fixes for UT
- Fixes to avoid valgrind warnings, potential crash and calculation overflow
- Merged files for decoder/encoder and remove unused files
- Improvements on build scripts, UT, error-protection in decoder, input param checking in encoder, assembly for 64bit support, downsampling, logging and documentation
Note:
'Constrained High Profile' = 'Constrained Baseline Profile' plus:
- CABAC
- Intra 8x8 mode support
- 8x8 transform
- QP scaling matrices
- QP per chroma component
- Mono 4:0:0 (experimental)
- Weighted prediction
v1.4.0
------
- Decoder new interface of DecodeFrameNoDelay
- Added new encoder and decoder statistics
- Added option for generating pdb in windows builds
- Added new rate control mode (RC_TIMESTAMP_MODE) for inconstant frame rate input
- Added new Sps/Pps strategies for real-time video
— Added support for simulcast avc
- Improvements in code structure, assembly, input parameter checking, logging, UT and comments
- In gmp-openh264, return decoder error correctly and other fixes
- Decoder bug fixes when for Error Concealment disabled
- Bug fixes for ParseOnly functions
- Bug fixes for encoding large frame size (>32767MBs)
- Fixes to avoid valgrind warnings, potential crash and calculation overflow
-----------
v1.3.1
------
- Fixed and enhanced protection to avoid crash when reading lossy bitstreams
- Adjust the default mode of Error Concealment used by gmp-openh264
-----------
v1.3.0
------
- Removed manual API document, now using wiki: https://github.com/cisco/openh264/wiki (0af48e5 for v1.3.0)
- Added API version in API header files
- Added pkg-config file
- Added decoder support of parsing only (bParseOnly) for only parsing bit stream but not decoding
- Added timestamp and max nal size in gmp-openh264.cpp when calling encoding
- Added timestamp info in decoder input and return structure
- Added support of level 9 in decoder
- Added total length of the encoded frame in encoder return structure
- Added SetOption(ENCODER_OPTION_SVC_ENCODE_PARAM_BASE,&base) for encoder
- Set constraint set 0 and 1 flags for non-scalable
- Improved error concealment algorithms and provide more modes of error-concealment
- Improved rate control algorithms and reference selection algorithms for screen content encoding
- Added encoder and decoder statistics interface
- Improved input parameter checking and logging
- Bug fixes, warning reductions, and test improvements
-----------
v1.2.0
------
- Add and modify encoder APIs related to rate control and screen content encoding
- Remove PauseFrame in encoder APIs
- Improve rate control and compression ratio for screen content encoding
- Improve error concealment algorithm
- Improve validation of input parameters
- Add ARM64 assembly
- bug fixes
-----------
v1.1.0
------
- Modify some APIs (see API doc for detail)
- Improve the compression ratio of screen content encoding
- ARM64 assembly support for most of core functions in encoder & decoder
- Modify error concealment logic to always return decoding error info until IDR picture comes
- fix some bugs
Binaries
-----------
These binary releases are distributed under this license:
http://www.openh264.org/BINARY_LICENSE.txt
v2.1.0
http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm.so.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm64.so.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm64.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-ios.a.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-ios.a.sig.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-linux32.5.so.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-linux32.5.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-linux64.5.so.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-linux64.5.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-osx32.5.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-2.1.0-osx64.5.dylib.bz2
http://ciscobinary.openh264.org/openh264-2.1.0-win32.dll.bz2
http://ciscobinary.openh264.org/openh264-2.1.0-win64.dll.bz2
v2.0.0
------
http://ciscobinary.openh264.org/libopenh264-2.0.0-android.so.bz2
http://ciscobinary.openh264.org/libopenh264-2.0.0-android.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-2.0.0-ios.a.bz2
http://ciscobinary.openh264.org/libopenh264-2.0.0-ios.a.sig.bz2
http://ciscobinary.openh264.org/libopenh264-2.0.0-linux32.5.so.bz2
http://ciscobinary.openh264.org/libopenh264-2.0.0-linux32.5.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-2.0.0-linux64.5.so.bz2
http://ciscobinary.openh264.org/libopenh264-2.0.0-linux64.5.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-2.0.0-osx32.5.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-2.0.0-osx64.5.dylib.bz2
http://ciscobinary.openh264.org/openh264-2.0.0-win32.dll.bz2
http://ciscobinary.openh264.org/openh264-2.0.0-win64.dll.bz2
v1.8.0
------
http://ciscobinary.openh264.org/libopenh264-1.8.0-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.8.0-android19.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-1.8.0-ios.a.bz2
http://ciscobinary.openh264.org/libopenh264-1.8.0-ios.a.sig.bz2
http://ciscobinary.openh264.org/libopenh264-1.8.0-linux32.4.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.8.0-linux32.4.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-1.8.0-linux64.4.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.8.0-linux64.4.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-1.8.0-osx32.4.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-1.8.0-osx64.4.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.8.0-win32.dll.bz2
http://ciscobinary.openh264.org/openh264-1.8.0-win64.dll.bz2
v1.7.0
------
http://ciscobinary.openh264.org/libopenh264-1.7.0-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.7.0-android19.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-1.7.0-ios.a.bz2
http://ciscobinary.openh264.org/libopenh264-1.7.0-ios.a.sig.bz2
http://ciscobinary.openh264.org/libopenh264-1.7.0-linux32.4.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.7.0-linux32.4.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-1.7.0-linux64.4.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.7.0-linux64.4.so.sig.bz2
http://ciscobinary.openh264.org/libopenh264-1.7.0-osx32.4.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-1.7.0-osx64.4.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.7.0-win32.dll.bz2
http://ciscobinary.openh264.org/openh264-1.7.0-win64.dll.bz2
v1.6.0
------
http://ciscobinary.openh264.org/libopenh264-1.6.0-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.6.0-ios.a.bz2
http://ciscobinary.openh264.org/libopenh264-1.6.0-linux32.3.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.6.0-linux64.3.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.6.0-osx32.3.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-1.6.0-osx64.3.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.6.0-win32msvc.dll.bz2
http://ciscobinary.openh264.org/openh264-1.6.0-win64msvc.dll.bz2
v1.5.0
------
http://ciscobinary.openh264.org/libopenh264-1.5.0-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.5.0-ios.a.bz2
http://ciscobinary.openh264.org/libopenh264-1.5.0-linux32.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.5.0-linux64.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.5.0-osx32.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-1.5.0-osx64.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.5.0-win32msvc.dll.bz2
http://ciscobinary.openh264.org/openh264-1.5.0-win64msvc.dll.bz2
v1.4.0
------
http://ciscobinary.openh264.org/libopenh264-1.4.0-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.4.0-linux32.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.4.0-linux64.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.4.0-osx32.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-1.4.0-osx64.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.4.0-win32msvc.dll.bz2
http://ciscobinary.openh264.org/openh264-1.4.0-win64msvc.dll.bz2
v1.3.1
------
http://ciscobinary.openh264.org/libopenh264-1.3.1-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.3.1-linux32.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.3.1-linux64.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.3.1-osx32.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-1.3.1-osx64.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.3.1-win32msvc.dll.bz2
http://ciscobinary.openh264.org/openh264-1.3.1-win64msvc.dll.bz2
v1.3.0
------
http://ciscobinary.openh264.org/libopenh264-1.3.0-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.3.0-linux32.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.3.0-linux64.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.3.0-osx32.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-1.3.0-osx64.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.3.0-win32msvc.dll.bz2
http://ciscobinary.openh264.org/openh264-1.3.0-win64msvc.dll.bz2
v1.2.0
------
http://ciscobinary.openh264.org/libopenh264-1.2.0-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.2.0-linux32.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.2.0-linux64.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.2.0-osx32.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-1.2.0-osx64.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.2.0-win32msvc.dll.bz2
http://ciscobinary.openh264.org/openh264-1.2.0-win64msvc.dll.bz2
v1.1.0
------
http://ciscobinary.openh264.org/libopenh264-1.1.0-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.1.0-linux32.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.1.0-linux64.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.1.0-osx32.dylib.bz2
http://ciscobinary.openh264.org/libopenh264-1.1.0-osx64.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.1.0-win32msvc.dll.bz2
http://ciscobinary.openh264.org/openh264-1.1.0-win64msvc.dll.bz2
v1.0.0
------
http://ciscobinary.openh264.org/libopenh264-1.0.0-android19.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.0.0-linux32.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.0.0-linux64.so.bz2
http://ciscobinary.openh264.org/libopenh264-1.0.0-osx64.dylib.bz2
http://ciscobinary.openh264.org/openh264-1.0.0-win32msvc.dll.bz2
http://ciscobinary.openh264.org/openh264-1.0.0-win64msvc.dll.bz2

View File

@ -0,0 +1,592 @@
/*!
*@page License
*
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef WELS_VIDEO_CODEC_SVC_API_H__
#define WELS_VIDEO_CODEC_SVC_API_H__
#ifndef __cplusplus
#if defined(_MSC_VER) && (_MSC_VER < 1800)
typedef unsigned char bool;
#else
#include <stdbool.h>
#endif
#endif
#include "codec_app_def.h"
#include "codec_def.h"
#if defined(_WIN32) || defined(__cdecl)
#define EXTAPI __cdecl
#else
#define EXTAPI
#endif
/**
* @file codec_api.h
*/
/**
* @page Overview
* * This page is for openh264 codec API usage.
* * For how to use the encoder,please refer to page UsageExampleForEncoder
* * For how to use the decoder,please refer to page UsageExampleForDecoder
* * For more detail about ISVEncoder,please refer to page ISVCEncoder
* * For more detail about ISVDecoder,please refer to page ISVCDecoder
*/
/**
* @page DecoderUsageExample
*
* @brief
* * An example for using the decoder for Decoding only or Parsing only
*
* Step 1:decoder declaration
* @code
*
* //decoder declaration
* ISVCDecoder *pSvcDecoder;
* //input: encoded bitstream start position; should include start code prefix
* unsigned char *pBuf =...;
* //input: encoded bit stream length; should include the size of start code prefix
* int iSize =...;
* //output: [0~2] for Y,U,V buffer for Decoding only
* unsigned char *pData[3] =...;
* //in-out: for Decoding only: declare and initialize the output buffer info, this should never co-exist with Parsing only
* SBufferInfo sDstBufInfo;
* memset(&sDstBufInfo, 0, sizeof(SBufferInfo));
* //in-out: for Parsing only: declare and initialize the output bitstream buffer info for parse only, this should never co-exist with Decoding only
* SParserBsInfo sDstParseInfo;
* memset(&sDstParseInfo, 0, sizeof(SParserBsInfo));
* sDstParseInfo.pDstBuff = new unsigned char[PARSE_SIZE]; //In Parsing only, allocate enough buffer to save transcoded bitstream for a frame
*
* @endcode
*
* Step 2:decoder creation
* @code
* WelsCreateDecoder(&pSvcDecoder);
* @endcode
*
* Step 3:declare required parameter, used to differentiate Decoding only and Parsing only
* @code
* SDecodingParam sDecParam = {0};
* sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_AVC;
* //for Parsing only, the assignment is mandatory
* sDecParam.bParseOnly = true;
* @endcode
*
* Step 4:initialize the parameter and decoder context, allocate memory
* @code
* pSvcDecoder->Initialize(&sDecParam);
* @endcode
*
* Step 5:do actual decoding process in slice level;
* this can be done in a loop until data ends
* @code
* //for Decoding only
* iRet = pSvcDecoder->DecodeFrameNoDelay(pBuf, iSize, pData, &sDstBufInfo);
* //or
* iRet = pSvcDecoder->DecodeFrame2(pBuf, iSize, pData, &sDstBufInfo);
* //for Parsing only
* iRet = pSvcDecoder->DecodeParser(pBuf, iSize, &sDstParseInfo);
* //decode failed
* If (iRet != 0){
* //error handling (RequestIDR or something like that)
* }
* //for Decoding only, pData can be used for render.
* if (sDstBufInfo.iBufferStatus==1){
* //output handling (pData[0], pData[1], pData[2])
* }
* //for Parsing only, sDstParseInfo can be used for, e.g., HW decoding
* if (sDstBufInfo.iNalNum > 0){
* //Hardware decoding sDstParseInfo;
* }
* //no-delay decoding can be realized by directly calling DecodeFrameNoDelay(), which is the recommended usage.
* //no-delay decoding can also be realized by directly calling DecodeFrame2() again with NULL input, as in the following. In this case, decoder would immediately reconstruct the input data. This can also be used similarly for Parsing only. Consequent decoding error and output indication should also be considered as above.
* iRet = pSvcDecoder->DecodeFrame2(NULL, 0, pData, &sDstBufInfo);
* //judge iRet, sDstBufInfo.iBufferStatus ...
* @endcode
*
* Step 6:uninitialize the decoder and memory free
* @code
* pSvcDecoder->Uninitialize();
* @endcode
*
* Step 7:destroy the decoder
* @code
* DestroyDecoder(pSvcDecoder);
* @endcode
*
*/
/**
* @page EncoderUsageExample1
*
* @brief
* * An example for using encoder with basic parameter
*
* Step1:setup encoder
* @code
* ISVCEncoder* encoder_;
* int rv = WelsCreateSVCEncoder (&encoder_);
* assert (rv == 0);
* assert (encoder_ != NULL);
* @endcode
*
* Step2:initilize with basic parameter
* @code
* SEncParamBase param;
* memset (&param, 0, sizeof (SEncParamBase));
* param.iUsageType = usageType; //from EUsageType enum
* param.fMaxFrameRate = frameRate;
* param.iPicWidth = width;
* param.iPicHeight = height;
* param.iTargetBitrate = 5000000;
* encoder_->Initialize (&param);
* @endcode
*
* Step3:set option, set option during encoding process
* @code
* encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &g_LevelSetting);
* int videoFormat = videoFormatI420;
* encoder_->SetOption (ENCODER_OPTION_DATAFORMAT, &videoFormat);
* @endcode
*
* Step4: encode and store ouput bistream
* @code
* int frameSize = width * height * 3 / 2;
* BufferedData buf;
* buf.SetLength (frameSize);
* assert (buf.Length() == (size_t)frameSize);
* SFrameBSInfo info;
* memset (&info, 0, sizeof (SFrameBSInfo));
* SSourcePicture pic;
* memset (&pic, 0, sizeof (SsourcePicture));
* pic.iPicWidth = width;
* pic.iPicHeight = height;
* pic.iColorFormat = videoFormatI420;
* pic.iStride[0] = pic.iPicWidth;
* pic.iStride[1] = pic.iStride[2] = pic.iPicWidth >> 1;
* pic.pData[0] = buf.data();
* pic.pData[1] = pic.pData[0] + width * height;
* pic.pData[2] = pic.pData[1] + (width * height >> 2);
* for(int num = 0;num<total_num;num++) {
* //prepare input data
* rv = encoder_->EncodeFrame (&pic, &info);
* assert (rv == cmResultSuccess);
* if (info.eFrameType != videoFrameTypeSkip) {
* //output bitstream handling
* }
* }
* @endcode
*
* Step5:teardown encoder
* @code
* if (encoder_) {
* encoder_->Uninitialize();
* WelsDestroySVCEncoder (encoder_);
* }
* @endcode
*
*/
/**
* @page EncoderUsageExample2
*
* @brief
* * An example for using the encoder with extension parameter.
* * The same operation on Step 1,3,4,5 with Example-1
*
* Step 2:initialize with extension parameter
* @code
* SEncParamExt param;
* encoder_->GetDefaultParams (&param);
* param.iUsageType = usageType;
* param.fMaxFrameRate = frameRate;
* param.iPicWidth = width;
* param.iPicHeight = height;
* param.iTargetBitrate = 5000000;
* param.bEnableDenoise = denoise;
* param.iSpatialLayerNum = layers;
* //SM_DYN_SLICE don't support multi-thread now
* if (sliceMode != SM_SINGLE_SLICE && sliceMode != SM_DYN_SLICE)
* param.iMultipleThreadIdc = 2;
*
* for (int i = 0; i < param.iSpatialLayerNum; i++) {
* param.sSpatialLayers[i].iVideoWidth = width >> (param.iSpatialLayerNum - 1 - i);
* param.sSpatialLayers[i].iVideoHeight = height >> (param.iSpatialLayerNum - 1 - i);
* param.sSpatialLayers[i].fFrameRate = frameRate;
* param.sSpatialLayers[i].iSpatialBitrate = param.iTargetBitrate;
*
* param.sSpatialLayers[i].sSliceCfg.uiSliceMode = sliceMode;
* if (sliceMode == SM_DYN_SLICE) {
* param.sSpatialLayers[i].sSliceCfg.sSliceArgument.uiSliceSizeConstraint = 600;
* param.uiMaxNalSize = 1500;
* }
* }
* param.iTargetBitrate *= param.iSpatialLayerNum;
* encoder_->InitializeExt (&param);
* int videoFormat = videoFormatI420;
* encoder_->SetOption (ENCODER_OPTION_DATAFORMAT, &videoFormat);
*
* @endcode
*/
#ifdef __cplusplus
/**
* @brief Endocder definition
*/
class ISVCEncoder {
public:
/**
* @brief Initialize the encoder
* @param pParam basic encoder parameter
* @return CM_RETURN: 0 - success; otherwise - failed;
*/
virtual int EXTAPI Initialize (const SEncParamBase* pParam) = 0;
/**
* @brief Initilaize encoder by using extension parameters.
* @param pParam extension parameter for encoder
* @return CM_RETURN: 0 - success; otherwise - failed;
*/
virtual int EXTAPI InitializeExt (const SEncParamExt* pParam) = 0;
/**
* @brief Get the default extension parameters.
* If you want to change some parameters of encoder, firstly you need to get the default encoding parameters,
* after that you can change part of parameters you want to.
* @param pParam extension parameter for encoder
* @return CM_RETURN: 0 - success; otherwise - failed;
* */
virtual int EXTAPI GetDefaultParams (SEncParamExt* pParam) = 0;
/// uninitialize the encoder
virtual int EXTAPI Uninitialize() = 0;
/**
* @brief Encode one frame
* @param kpSrcPic the pointer to the source luminance plane
* chrominance data:
* CbData = kpSrc + m_iMaxPicWidth * m_iMaxPicHeight;
* CrData = CbData + (m_iMaxPicWidth * m_iMaxPicHeight)/4;
* the application calling this interface needs to ensure the data validation between the location
* @param pBsInfo output bit stream
* @return 0 - success; otherwise -failed;
*/
virtual int EXTAPI EncodeFrame (const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo) = 0;
/**
* @brief Encode the parameters from output bit stream
* @param pBsInfo output bit stream
* @return 0 - success; otherwise - failed;
*/
virtual int EXTAPI EncodeParameterSets (SFrameBSInfo* pBsInfo) = 0;
/**
* @brief Force encoder to encoder frame as IDR if bIDR set as true
* @param bIDR true: force encoder to encode frame as IDR frame;false, return 1 and nothing to do
* @return 0 - success; otherwise - failed;
*/
virtual int EXTAPI ForceIntraFrame (bool bIDR, int iLayerId = -1) = 0;
/**
* @brief Set option for encoder, detail option type, please refer to enumurate ENCODER_OPTION.
* @param pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,...
* @return CM_RETURN: 0 - success; otherwise - failed;
*/
virtual int EXTAPI SetOption (ENCODER_OPTION eOptionId, void* pOption) = 0;
/**
* @brief Get option for encoder, detail option type, please refer to enumurate ENCODER_OPTION.
* @param pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,...
* @return CM_RETURN: 0 - success; otherwise - failed;
*/
virtual int EXTAPI GetOption (ENCODER_OPTION eOptionId, void* pOption) = 0;
virtual ~ISVCEncoder() {}
};
/**
* @brief Decoder definition
*/
class ISVCDecoder {
public:
/**
* @brief Initilaize decoder
* @param pParam parameter for decoder
* @return 0 - success; otherwise - failed;
*/
virtual long EXTAPI Initialize (const SDecodingParam* pParam) = 0;
/// Uninitialize the decoder
virtual long EXTAPI Uninitialize() = 0;
/**
* @brief Decode one frame
* @param pSrc the h264 stream to be decoded
* @param iSrcLen the length of h264 stream
* @param ppDst buffer pointer of decoded data (YUV)
* @param pStride output stride
* @param iWidth output width
* @param iHeight output height
* @return 0 - success; otherwise -failed;
*/
virtual DECODING_STATE EXTAPI DecodeFrame (const unsigned char* pSrc,
const int iSrcLen,
unsigned char** ppDst,
int* pStride,
int& iWidth,
int& iHeight) = 0;
/**
* @brief For slice level DecodeFrameNoDelay() (4 parameters input),
* whatever the function return value is, the output data
* of I420 format will only be available when pDstInfo->iBufferStatus == 1,.
* This function will parse and reconstruct the input frame immediately if it is complete
* It is recommended as the main decoding function for H.264/AVC format input
* @param pSrc the h264 stream to be decoded
* @param iSrcLen the length of h264 stream
* @param ppDst buffer pointer of decoded data (YUV)
* @param pDstInfo information provided to API(width, height, etc.)
* @return 0 - success; otherwise -failed;
*/
virtual DECODING_STATE EXTAPI DecodeFrameNoDelay (const unsigned char* pSrc,
const int iSrcLen,
unsigned char** ppDst,
SBufferInfo* pDstInfo) = 0;
/**
* @brief For slice level DecodeFrame2() (4 parameters input),
* whatever the function return value is, the output data
* of I420 format will only be available when pDstInfo->iBufferStatus == 1,.
* (e.g., in multi-slice cases, only when the whole picture
* is completely reconstructed, this variable would be set equal to 1.)
* @param pSrc the h264 stream to be decoded
* @param iSrcLen the length of h264 stream
* @param ppDst buffer pointer of decoded data (YUV)
* @param pDstInfo information provided to API(width, height, etc.)
* @return 0 - success; otherwise -failed;
*/
virtual DECODING_STATE EXTAPI DecodeFrame2 (const unsigned char* pSrc,
const int iSrcLen,
unsigned char** ppDst,
SBufferInfo* pDstInfo) = 0;
/**
* @brief This function gets a decoded ready frame remaining in buffers after the last frame has been decoded.
* Use GetOption with option DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER to get the number of frames remaining in buffers.
* Note that it is only applicable for profile_idc != 66
* @param ppDst buffer pointer of decoded data (YUV)
* @param pDstInfo information provided to API(width, height, etc.)
* @return 0 - success; otherwise -failed;
*/
virtual DECODING_STATE EXTAPI FlushFrame (unsigned char** ppDst,
SBufferInfo* pDstInfo) = 0;
/**
* @brief This function parse input bitstream only, and rewrite possible SVC syntax to AVC syntax
* @param pSrc the h264 stream to be decoded
* @param iSrcLen the length of h264 stream
* @param pDstInfo bit stream info
* @return 0 - success; otherwise -failed;
*/
virtual DECODING_STATE EXTAPI DecodeParser (const unsigned char* pSrc,
const int iSrcLen,
SParserBsInfo* pDstInfo) = 0;
/**
* @brief This API does not work for now!! This is for future use to support non-I420 color format output.
* @param pSrc the h264 stream to be decoded
* @param iSrcLen the length of h264 stream
* @param pDst buffer pointer of decoded data (YUV)
* @param iDstStride output stride
* @param iDstLen bit stream info
* @param iWidth output width
* @param iHeight output height
* @param iColorFormat output color format
* @return to do ...
*/
virtual DECODING_STATE EXTAPI DecodeFrameEx (const unsigned char* pSrc,
const int iSrcLen,
unsigned char* pDst,
int iDstStride,
int& iDstLen,
int& iWidth,
int& iHeight,
int& iColorFormat) = 0;
/**
* @brief Set option for decoder, detail option type, please refer to enumurate DECODER_OPTION.
* @param pOption option for decoder such as OutDataFormat, Eos Flag, EC method, ...
* @return CM_RETURN: 0 - success; otherwise - failed;
*/
virtual long EXTAPI SetOption (DECODER_OPTION eOptionId, void* pOption) = 0;
/**
* @brief Get option for decoder, detail option type, please refer to enumurate DECODER_OPTION.
* @param pOption option for decoder such as OutDataFormat, Eos Flag, EC method, ...
* @return CM_RETURN: 0 - success; otherwise - failed;
*/
virtual long EXTAPI GetOption (DECODER_OPTION eOptionId, void* pOption) = 0;
virtual ~ISVCDecoder() {}
};
extern "C"
{
#else
typedef struct ISVCEncoderVtbl ISVCEncoderVtbl;
typedef const ISVCEncoderVtbl* ISVCEncoder;
struct ISVCEncoderVtbl {
int (*Initialize) (ISVCEncoder*, const SEncParamBase* pParam);
int (*InitializeExt) (ISVCEncoder*, const SEncParamExt* pParam);
int (*GetDefaultParams) (ISVCEncoder*, SEncParamExt* pParam);
int (*Uninitialize) (ISVCEncoder*);
int (*EncodeFrame) (ISVCEncoder*, const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo);
int (*EncodeParameterSets) (ISVCEncoder*, SFrameBSInfo* pBsInfo);
int (*ForceIntraFrame) (ISVCEncoder*, bool bIDR);
int (*SetOption) (ISVCEncoder*, ENCODER_OPTION eOptionId, void* pOption);
int (*GetOption) (ISVCEncoder*, ENCODER_OPTION eOptionId, void* pOption);
};
typedef struct ISVCDecoderVtbl ISVCDecoderVtbl;
typedef const ISVCDecoderVtbl* ISVCDecoder;
struct ISVCDecoderVtbl {
long (*Initialize) (ISVCDecoder*, const SDecodingParam* pParam);
long (*Uninitialize) (ISVCDecoder*);
DECODING_STATE (*DecodeFrame) (ISVCDecoder*, const unsigned char* pSrc,
const int iSrcLen,
unsigned char** ppDst,
int* pStride,
int* iWidth,
int* iHeight);
DECODING_STATE (*DecodeFrameNoDelay) (ISVCDecoder*, const unsigned char* pSrc,
const int iSrcLen,
unsigned char** ppDst,
SBufferInfo* pDstInfo);
DECODING_STATE (*DecodeFrame2) (ISVCDecoder*, const unsigned char* pSrc,
const int iSrcLen,
unsigned char** ppDst,
SBufferInfo* pDstInfo);
DECODING_STATE (*FlushFrame) (ISVCDecoder*, unsigned char** ppDst,
SBufferInfo* pDstInfo);
DECODING_STATE (*DecodeParser) (ISVCDecoder*, const unsigned char* pSrc,
const int iSrcLen,
SParserBsInfo* pDstInfo);
DECODING_STATE (*DecodeFrameEx) (ISVCDecoder*, const unsigned char* pSrc,
const int iSrcLen,
unsigned char* pDst,
int iDstStride,
int* iDstLen,
int* iWidth,
int* iHeight,
int* iColorFormat);
long (*SetOption) (ISVCDecoder*, DECODER_OPTION eOptionId, void* pOption);
long (*GetOption) (ISVCDecoder*, DECODER_OPTION eOptionId, void* pOption);
};
#endif
typedef void (*WelsTraceCallback) (void* ctx, int level, const char* string);
/** @brief Create encoder
* @param ppEncoder encoder
* @return 0 - success; otherwise - failed;
*/
int WelsCreateSVCEncoder (ISVCEncoder** ppEncoder);
/** @brief Destroy encoder
* @param pEncoder encoder
* @return void
*/
void WelsDestroySVCEncoder (ISVCEncoder* pEncoder);
/** @brief Get the capability of decoder
* @param pDecCapability decoder capability
* @return 0 - success; otherwise - failed;
*/
int WelsGetDecoderCapability (SDecoderCapability* pDecCapability);
/** @brief Create decoder
* @param ppDecoder decoder
* @return 0 - success; otherwise - failed;
*/
long WelsCreateDecoder (ISVCDecoder** ppDecoder);
/** @brief Destroy decoder
* @param pDecoder decoder
* @return void
*/
void WelsDestroyDecoder (ISVCDecoder* pDecoder);
/** @brief Get codec version
* Note, old versions of Mingw (GCC < 4.7) are buggy and use an
* incorrect/different ABI for calling this function, making it
* incompatible with MSVC builds.
* @return The linked codec version
*/
OpenH264Version WelsGetCodecVersion (void);
/** @brief Get codec version
* @param pVersion struct to fill in with the version
*/
void WelsGetCodecVersionEx (OpenH264Version* pVersion);
#ifdef __cplusplus
}
#endif
#endif//WELS_VIDEO_CODEC_SVC_API_H__

View File

@ -0,0 +1,810 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
#define WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
/**
* @file codec_app_def.h
* @brief Data and /or structures introduced in Cisco OpenH264 application
*/
#include "codec_def.h"
/* Constants */
#define MAX_TEMPORAL_LAYER_NUM 4
#define MAX_SPATIAL_LAYER_NUM 4
#define MAX_QUALITY_LAYER_NUM 4
#define MAX_LAYER_NUM_OF_FRAME 128
#define MAX_NAL_UNITS_IN_LAYER 128 ///< predetermined here, adjust it later if need
#define MAX_RTP_PAYLOAD_LEN 1000
#define AVERAGE_RTP_PAYLOAD_LEN 800
#define SAVED_NALUNIT_NUM_TMP ( (MAX_SPATIAL_LAYER_NUM*MAX_QUALITY_LAYER_NUM) + 1 + MAX_SPATIAL_LAYER_NUM ) ///< SPS/PPS + SEI/SSEI + PADDING_NAL
#define MAX_SLICES_NUM_TMP ( ( MAX_NAL_UNITS_IN_LAYER - SAVED_NALUNIT_NUM_TMP ) / 3 )
#define AUTO_REF_PIC_COUNT -1 ///< encoder selects the number of reference frame automatically
#define UNSPECIFIED_BIT_RATE 0 ///< to do: add detail comment
/**
* @brief Struct of OpenH264 version
*/
///
/// E.g. SDK version is 1.2.0.0, major version number is 1, minor version number is 2, and revision number is 0.
typedef struct _tagVersion {
unsigned int uMajor; ///< The major version number
unsigned int uMinor; ///< The minor version number
unsigned int uRevision; ///< The revision number
unsigned int uReserved; ///< The reserved number, it should be 0.
} OpenH264Version;
/**
* @brief Decoding status
*/
typedef enum {
/**
* Errors derived from bitstream parsing
*/
dsErrorFree = 0x00, ///< bit stream error-free
dsFramePending = 0x01, ///< need more throughput to generate a frame output,
dsRefLost = 0x02, ///< layer lost at reference frame with temporal id 0
dsBitstreamError = 0x04, ///< error bitstreams(maybe broken internal frame) the decoder cared
dsDepLayerLost = 0x08, ///< dependented layer is ever lost
dsNoParamSets = 0x10, ///< no parameter set NALs involved
dsDataErrorConcealed = 0x20, ///< current data error concealed specified
dsRefListNullPtrs = 0x40, ///<ref picure list contains null ptrs within uiRefCount range
/**
* Errors derived from logic level
*/
dsInvalidArgument = 0x1000, ///< invalid argument specified
dsInitialOptExpected = 0x2000, ///< initializing operation is expected
dsOutOfMemory = 0x4000, ///< out of memory due to new request
/**
* ANY OTHERS?
*/
dsDstBufNeedExpan = 0x8000 ///< actual picture size exceeds size of dst pBuffer feed in decoder, so need expand its size
} DECODING_STATE;
/**
* @brief Option types introduced in SVC encoder application
*/
typedef enum {
ENCODER_OPTION_DATAFORMAT = 0,
ENCODER_OPTION_IDR_INTERVAL, ///< IDR period,0/-1 means no Intra period (only the first frame); lager than 0 means the desired IDR period, must be multiple of (2^temporal_layer)
ENCODER_OPTION_SVC_ENCODE_PARAM_BASE, ///< structure of Base Param
ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, ///< structure of Extension Param
ENCODER_OPTION_FRAME_RATE, ///< maximal input frame rate, current supported range: MAX_FRAME_RATE = 30,MIN_FRAME_RATE = 1
ENCODER_OPTION_BITRATE,
ENCODER_OPTION_MAX_BITRATE,
ENCODER_OPTION_INTER_SPATIAL_PRED,
ENCODER_OPTION_RC_MODE,
ENCODER_OPTION_RC_FRAME_SKIP,
ENCODER_PADDING_PADDING, ///< 0:disable padding;1:padding
ENCODER_OPTION_PROFILE, ///< assgin the profile for each layer
ENCODER_OPTION_LEVEL, ///< assgin the level for each layer
ENCODER_OPTION_NUMBER_REF, ///< the number of refererence frame
ENCODER_OPTION_DELIVERY_STATUS, ///< the delivery info which is a feedback from app level
ENCODER_LTR_RECOVERY_REQUEST,
ENCODER_LTR_MARKING_FEEDBACK,
ENCODER_LTR_MARKING_PERIOD,
ENCODER_OPTION_LTR, ///< 0:disable LTR;larger than 0 enable LTR; LTR number is fixed to be 2 in current encoder
ENCODER_OPTION_COMPLEXITY,
ENCODER_OPTION_ENABLE_SSEI, ///< enable SSEI: true--enable ssei; false--disable ssei
ENCODER_OPTION_ENABLE_PREFIX_NAL_ADDING, ///< enable prefix: true--enable prefix; false--disable prefix
ENCODER_OPTION_SPS_PPS_ID_STRATEGY, ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
ENCODER_OPTION_CURRENT_PATH,
ENCODER_OPTION_DUMP_FILE, ///< dump layer reconstruct frame to a specified file
ENCODER_OPTION_TRACE_LEVEL, ///< trace info based on the trace level
ENCODER_OPTION_TRACE_CALLBACK, ///< a void (*)(void* context, int level, const char* message) function which receives log messages
ENCODER_OPTION_TRACE_CALLBACK_CONTEXT, ///< context info of trace callback
ENCODER_OPTION_GET_STATISTICS, ///< read only
ENCODER_OPTION_STATISTICS_LOG_INTERVAL, ///< log interval in millisecond
ENCODER_OPTION_IS_LOSSLESS_LINK, ///< advanced algorithmetic settings
ENCODER_OPTION_BITS_VARY_PERCENTAGE ///< bit vary percentage
} ENCODER_OPTION;
/**
* @brief Option types introduced in decoder application
*/
typedef enum {
DECODER_OPTION_END_OF_STREAM = 1, ///< end of stream flag
DECODER_OPTION_VCL_NAL, ///< feedback whether or not have VCL NAL in current AU for application layer
DECODER_OPTION_TEMPORAL_ID, ///< feedback temporal id for application layer
DECODER_OPTION_FRAME_NUM, ///< feedback current decoded frame number
DECODER_OPTION_IDR_PIC_ID, ///< feedback current frame belong to which IDR period
DECODER_OPTION_LTR_MARKING_FLAG, ///< feedback wether current frame mark a LTR
DECODER_OPTION_LTR_MARKED_FRAME_NUM, ///< feedback frame num marked by current Frame
DECODER_OPTION_ERROR_CON_IDC, ///< indicate decoder error concealment method
DECODER_OPTION_TRACE_LEVEL,
DECODER_OPTION_TRACE_CALLBACK, ///< a void (*)(void* context, int level, const char* message) function which receives log messages
DECODER_OPTION_TRACE_CALLBACK_CONTEXT,///< context info of trace callbac
DECODER_OPTION_GET_STATISTICS, ///< feedback decoder statistics
DECODER_OPTION_GET_SAR_INFO, ///< feedback decoder Sample Aspect Ratio info in Vui
DECODER_OPTION_PROFILE, ///< get current AU profile info, only is used in GetOption
DECODER_OPTION_LEVEL, ///< get current AU level info,only is used in GetOption
DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval
DECODER_OPTION_IS_REF_PIC, ///< feedback current frame is ref pic or not
DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
DECODER_OPTION_NUM_OF_THREADS, ///< number of decoding threads. The maximum thread count is equal or less than lesser of (cpu core counts and 16).
} DECODER_OPTION;
/**
* @brief Enumerate the type of error concealment methods
*/
typedef enum {
ERROR_CON_DISABLE = 0,
ERROR_CON_FRAME_COPY,
ERROR_CON_SLICE_COPY,
ERROR_CON_FRAME_COPY_CROSS_IDR,
ERROR_CON_SLICE_COPY_CROSS_IDR,
ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE,
ERROR_CON_SLICE_MV_COPY_CROSS_IDR,
ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE
} ERROR_CON_IDC;
/**
* @brief Feedback that whether or not have VCL NAL in current AU
*/
typedef enum {
FEEDBACK_NON_VCL_NAL = 0,
FEEDBACK_VCL_NAL,
FEEDBACK_UNKNOWN_NAL
} FEEDBACK_VCL_NAL_IN_AU;
/**
* @brief Type of layer being encoded
*/
typedef enum {
NON_VIDEO_CODING_LAYER = 0,
VIDEO_CODING_LAYER = 1
} LAYER_TYPE;
/**
* @brief Spatial layer num
*/
typedef enum {
SPATIAL_LAYER_0 = 0,
SPATIAL_LAYER_1 = 1,
SPATIAL_LAYER_2 = 2,
SPATIAL_LAYER_3 = 3,
SPATIAL_LAYER_ALL = 4
} LAYER_NUM;
/**
* @brief Enumerate the type of video bitstream which is provided to decoder
*/
typedef enum {
VIDEO_BITSTREAM_AVC = 0,
VIDEO_BITSTREAM_SVC = 1,
VIDEO_BITSTREAM_DEFAULT = VIDEO_BITSTREAM_SVC
} VIDEO_BITSTREAM_TYPE;
/**
* @brief Enumerate the type of key frame request
*/
typedef enum {
NO_RECOVERY_REQUSET = 0,
LTR_RECOVERY_REQUEST = 1,
IDR_RECOVERY_REQUEST = 2,
NO_LTR_MARKING_FEEDBACK = 3,
LTR_MARKING_SUCCESS = 4,
LTR_MARKING_FAILED = 5
} KEY_FRAME_REQUEST_TYPE;
/**
* @brief Structure for LTR recover request
*/
typedef struct {
unsigned int uiFeedbackType; ///< IDR request or LTR recovery request
unsigned int uiIDRPicId; ///< distinguish request from different IDR
int iLastCorrectFrameNum;
int iCurrentFrameNum; ///< specify current decoder frame_num.
int iLayerId; //specify the layer for recovery request
} SLTRRecoverRequest;
/**
* @brief Structure for LTR marking feedback
*/
typedef struct {
unsigned int uiFeedbackType; ///< mark failed or successful
unsigned int uiIDRPicId; ///< distinguish request from different IDR
int iLTRFrameNum; ///< specify current decoder frame_num
int iLayerId; //specify the layer for LTR marking feedback
} SLTRMarkingFeedback;
/**
* @brief Structure for LTR configuration
*/
typedef struct {
bool bEnableLongTermReference; ///< 1: on, 0: off
int iLTRRefNum; ///< TODO: not supported to set it arbitrary yet
} SLTRConfig;
/**
* @brief Enumerate the type of rate control mode
*/
typedef enum {
RC_QUALITY_MODE = 0, ///< quality mode
RC_BITRATE_MODE = 1, ///< bitrate mode
RC_BUFFERBASED_MODE = 2, ///< no bitrate control,only using buffer status,adjust the video quality
RC_TIMESTAMP_MODE = 3, //rate control based timestamp
RC_BITRATE_MODE_POST_SKIP = 4, ///< this is in-building RC MODE, WILL BE DELETED after algorithm tuning!
RC_OFF_MODE = -1, ///< rate control off mode
} RC_MODES;
/**
* @brief Enumerate the type of profile id
*/
typedef enum {
PRO_UNKNOWN = 0,
PRO_BASELINE = 66,
PRO_MAIN = 77,
PRO_EXTENDED = 88,
PRO_HIGH = 100,
PRO_HIGH10 = 110,
PRO_HIGH422 = 122,
PRO_HIGH444 = 144,
PRO_CAVLC444 = 244,
PRO_SCALABLE_BASELINE = 83,
PRO_SCALABLE_HIGH = 86
} EProfileIdc;
/**
* @brief Enumerate the type of level id
*/
typedef enum {
LEVEL_UNKNOWN = 0,
LEVEL_1_0 = 10,
LEVEL_1_B = 9,
LEVEL_1_1 = 11,
LEVEL_1_2 = 12,
LEVEL_1_3 = 13,
LEVEL_2_0 = 20,
LEVEL_2_1 = 21,
LEVEL_2_2 = 22,
LEVEL_3_0 = 30,
LEVEL_3_1 = 31,
LEVEL_3_2 = 32,
LEVEL_4_0 = 40,
LEVEL_4_1 = 41,
LEVEL_4_2 = 42,
LEVEL_5_0 = 50,
LEVEL_5_1 = 51,
LEVEL_5_2 = 52
} ELevelIdc;
/**
* @brief Enumerate the type of wels log
*/
enum {
WELS_LOG_QUIET = 0x00, ///< quiet mode
WELS_LOG_ERROR = 1 << 0, ///< error log iLevel
WELS_LOG_WARNING = 1 << 1, ///< Warning log iLevel
WELS_LOG_INFO = 1 << 2, ///< information log iLevel
WELS_LOG_DEBUG = 1 << 3, ///< debug log, critical algo log
WELS_LOG_DETAIL = 1 << 4, ///< per packet/frame log
WELS_LOG_RESV = 1 << 5, ///< resversed log iLevel
WELS_LOG_LEVEL_COUNT = 6,
WELS_LOG_DEFAULT = WELS_LOG_WARNING ///< default log iLevel in Wels codec
};
/**
* @brief Enumerate the type of slice mode
*/
typedef enum {
SM_SINGLE_SLICE = 0, ///< | SliceNum==1
SM_FIXEDSLCNUM_SLICE = 1, ///< | according to SliceNum | enabled dynamic slicing for multi-thread
SM_RASTER_SLICE = 2, ///< | according to SlicesAssign | need input of MB numbers each slice. In addition, if other constraint in SSliceArgument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
SM_SIZELIMITED_SLICE = 3, ///< | according to SliceSize | slicing according to size, the slicing will be dynamic(have no idea about slice_nums until encoding current frame)
SM_RESERVED = 4
} SliceModeEnum;
/**
* @brief Structure for slice argument
*/
typedef struct {
SliceModeEnum uiSliceMode; ///< by default, uiSliceMode will be SM_SINGLE_SLICE
unsigned int
uiSliceNum; ///< only used when uiSliceMode=1, when uiSliceNum=0 means auto design it with cpu core number
unsigned int
uiSliceMbNum[MAX_SLICES_NUM_TMP]; ///< only used when uiSliceMode=2; when =0 means setting one MB row a slice
unsigned int uiSliceSizeConstraint; ///< now only used when uiSliceMode=4
} SSliceArgument;
/**
* @brief Enumerate the type of video format
*/
typedef enum {
VF_COMPONENT,
VF_PAL,
VF_NTSC,
VF_SECAM,
VF_MAC,
VF_UNDEF,
VF_NUM_ENUM
} EVideoFormatSPS; // EVideoFormat is already defined/used elsewhere!
/**
* @brief Enumerate the type of color primaries
*/
typedef enum {
CP_RESERVED0,
CP_BT709,
CP_UNDEF,
CP_RESERVED3,
CP_BT470M,
CP_BT470BG,
CP_SMPTE170M,
CP_SMPTE240M,
CP_FILM,
CP_BT2020,
CP_NUM_ENUM
} EColorPrimaries;
/**
* @brief Enumerate the type of transfer characteristics
*/
typedef enum {
TRC_RESERVED0,
TRC_BT709,
TRC_UNDEF,
TRC_RESERVED3,
TRC_BT470M,
TRC_BT470BG,
TRC_SMPTE170M,
TRC_SMPTE240M,
TRC_LINEAR,
TRC_LOG100,
TRC_LOG316,
TRC_IEC61966_2_4,
TRC_BT1361E,
TRC_IEC61966_2_1,
TRC_BT2020_10,
TRC_BT2020_12,
TRC_NUM_ENUM
} ETransferCharacteristics;
/**
* @brief Enumerate the type of color matrix
*/
typedef enum {
CM_GBR,
CM_BT709,
CM_UNDEF,
CM_RESERVED3,
CM_FCC,
CM_BT470BG,
CM_SMPTE170M,
CM_SMPTE240M,
CM_YCGCO,
CM_BT2020NC,
CM_BT2020C,
CM_NUM_ENUM
} EColorMatrix;
/**
* @brief Enumerate the type of sample aspect ratio
*/
typedef enum {
ASP_UNSPECIFIED = 0,
ASP_1x1 = 1,
ASP_12x11 = 2,
ASP_10x11 = 3,
ASP_16x11 = 4,
ASP_40x33 = 5,
ASP_24x11 = 6,
ASP_20x11 = 7,
ASP_32x11 = 8,
ASP_80x33 = 9,
ASP_18x11 = 10,
ASP_15x11 = 11,
ASP_64x33 = 12,
ASP_160x99 = 13,
ASP_EXT_SAR = 255
} ESampleAspectRatio;
/**
* @brief Structure for spatial layer configuration
*/
typedef struct {
int iVideoWidth; ///< width of picture in luminance samples of a layer
int iVideoHeight; ///< height of picture in luminance samples of a layer
float fFrameRate; ///< frame rate specified for a layer
int iSpatialBitrate; ///< target bitrate for a spatial layer, in unit of bps
int iMaxSpatialBitrate; ///< maximum bitrate for a spatial layer, in unit of bps
EProfileIdc uiProfileIdc; ///< value of profile IDC (PRO_UNKNOWN for auto-detection)
ELevelIdc uiLevelIdc; ///< value of profile IDC (0 for auto-detection)
int iDLayerQp; ///< value of level IDC (0 for auto-detection)
SSliceArgument sSliceArgument;
// Note: members bVideoSignalTypePresent through uiColorMatrix below are also defined in SWelsSPS in parameter_sets.h.
bool bVideoSignalTypePresent; // false => do not write any of the following information to the header
unsigned char
uiVideoFormat; // EVideoFormatSPS; 3 bits in header; 0-5 => component, kpal, ntsc, secam, mac, undef
bool bFullRange; // false => analog video data range [16, 235]; true => full data range [0,255]
bool bColorDescriptionPresent; // false => do not write any of the following three items to the header
unsigned char
uiColorPrimaries; // EColorPrimaries; 8 bits in header; 0 - 9 => ???, bt709, undef, ???, bt470m, bt470bg,
// smpte170m, smpte240m, film, bt2020
unsigned char
uiTransferCharacteristics; // ETransferCharacteristics; 8 bits in header; 0 - 15 => ???, bt709, undef, ???, bt470m, bt470bg, smpte170m,
// smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1, bt2020-10, bt2020-12
unsigned char
uiColorMatrix; // EColorMatrix; 8 bits in header (corresponds to FFmpeg "colorspace"); 0 - 10 => GBR, bt709,
// undef, ???, fcc, bt470bg, smpte170m, smpte240m, YCgCo, bt2020nc, bt2020c
bool bAspectRatioPresent; ///< aspect ratio present in VUI
ESampleAspectRatio eAspectRatio; ///< aspect ratio idc
unsigned short sAspectRatioExtWidth; ///< use if aspect ratio idc == 255
unsigned short sAspectRatioExtHeight; ///< use if aspect ratio idc == 255
} SSpatialLayerConfig;
/**
* @brief Encoder usage type
*/
typedef enum {
CAMERA_VIDEO_REAL_TIME, ///< camera video for real-time communication
SCREEN_CONTENT_REAL_TIME, ///< screen content signal
CAMERA_VIDEO_NON_REAL_TIME,
SCREEN_CONTENT_NON_REAL_TIME,
INPUT_CONTENT_TYPE_ALL,
} EUsageType;
/**
* @brief Enumulate the complexity mode
*/
typedef enum {
LOW_COMPLEXITY = 0, ///< the lowest compleixty,the fastest speed,
MEDIUM_COMPLEXITY, ///< medium complexity, medium speed,medium quality
HIGH_COMPLEXITY ///< high complexity, lowest speed, high quality
} ECOMPLEXITY_MODE;
/**
* @brief Enumulate for the stategy of SPS/PPS strategy
*/
typedef enum {
CONSTANT_ID = 0, ///< constant id in SPS/PPS
INCREASING_ID = 0x01, ///< SPS/PPS id increases at each IDR
SPS_LISTING = 0x02, ///< using SPS in the existing list if possible
SPS_LISTING_AND_PPS_INCREASING = 0x03,
SPS_PPS_LISTING = 0x06,
} EParameterSetStrategy;
// TODO: Refine the parameters definition.
/**
* @brief SVC Encoding Parameters
*/
typedef struct TagEncParamBase {
EUsageType
iUsageType; ///< application type; please refer to the definition of EUsageType
int iPicWidth; ///< width of picture in luminance samples (the maximum of all layers if multiple spatial layers presents)
int iPicHeight; ///< height of picture in luminance samples((the maximum of all layers if multiple spatial layers presents)
int iTargetBitrate; ///< target bitrate desired, in unit of bps
RC_MODES iRCMode; ///< rate control mode
float fMaxFrameRate; ///< maximal input frame rate
} SEncParamBase, *PEncParamBase;
/**
* @brief SVC Encoding Parameters extention
*/
typedef struct TagEncParamExt {
EUsageType
iUsageType; ///< same as in TagEncParamBase
int iPicWidth; ///< same as in TagEncParamBase
int iPicHeight; ///< same as in TagEncParamBase
int iTargetBitrate; ///< same as in TagEncParamBase
RC_MODES iRCMode; ///< same as in TagEncParamBase
float fMaxFrameRate; ///< same as in TagEncParamBase
int iTemporalLayerNum; ///< temporal layer number, max temporal layer = 4
int iSpatialLayerNum; ///< spatial layer number,1<= iSpatialLayerNum <= MAX_SPATIAL_LAYER_NUM, MAX_SPATIAL_LAYER_NUM = 4
SSpatialLayerConfig sSpatialLayers[MAX_SPATIAL_LAYER_NUM];
ECOMPLEXITY_MODE iComplexityMode;
unsigned int uiIntraPeriod; ///< period of Intra frame
int iNumRefFrame; ///< number of reference frame used
EParameterSetStrategy
eSpsPpsIdStrategy; ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
bool bPrefixNalAddingCtrl; ///< false:not use Prefix NAL; true: use Prefix NAL
bool bEnableSSEI; ///< false:not use SSEI; true: use SSEI -- TODO: planning to remove the interface of SSEI
bool bSimulcastAVC; ///< (when encoding more than 1 spatial layer) false: use SVC syntax for higher layers; true: use Simulcast AVC
int iPaddingFlag; ///< 0:disable padding;1:padding
int iEntropyCodingModeFlag; ///< 0:CAVLC 1:CABAC.
/* rc control */
bool bEnableFrameSkip; ///< False: don't skip frame even if VBV buffer overflow.True: allow skipping frames to keep the bitrate within limits
int iMaxBitrate; ///< the maximum bitrate, in unit of bps, set it to UNSPECIFIED_BIT_RATE if not needed
int iMaxQp; ///< the maximum QP encoder supports
int iMinQp; ///< the minmum QP encoder supports
unsigned int uiMaxNalSize; ///< the maximum NAL size. This value should be not 0 for dynamic slice mode
/*LTR settings*/
bool bEnableLongTermReference; ///< 1: on, 0: off
int iLTRRefNum; ///< the number of LTR(long term reference),TODO: not supported to set it arbitrary yet
unsigned int iLtrMarkPeriod; ///< the LTR marked period that is used in feedback.
/* multi-thread settings*/
unsigned short
iMultipleThreadIdc; ///< 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; lager than 1: count number of threads;
bool bUseLoadBalancing; ///< only used when uiSliceMode=1 or 3, will change slicing of a picture during the run-time of multi-thread encoding, so the result of each run may be different
/* Deblocking loop filter */
int iLoopFilterDisableIdc; ///< 0: on, 1: off, 2: on except for slice boundaries
int iLoopFilterAlphaC0Offset; ///< AlphaOffset: valid range [-6, 6], default 0
int iLoopFilterBetaOffset; ///< BetaOffset: valid range [-6, 6], default 0
/*pre-processing feature*/
bool bEnableDenoise; ///< denoise control
bool bEnableBackgroundDetection; ///< background detection control //VAA_BACKGROUND_DETECTION //BGD cmd
bool bEnableAdaptiveQuant; ///< adaptive quantization control
bool bEnableFrameCroppingFlag; ///< enable frame cropping flag: TRUE always in application
bool bEnableSceneChangeDetect;
bool bIsLosslessLink; ///< LTR advanced setting
} SEncParamExt;
/**
* @brief Define a new struct to show the property of video bitstream.
*/
typedef struct {
unsigned int size; ///< size of the struct
VIDEO_BITSTREAM_TYPE eVideoBsType; ///< video stream type (AVC/SVC)
} SVideoProperty;
/**
* @brief SVC Decoding Parameters, reserved here and potential applicable in the future
*/
typedef struct TagSVCDecodingParam {
char* pFileNameRestructed; ///< file name of reconstructed frame used for PSNR calculation based debug
unsigned int uiCpuLoad; ///< CPU load
unsigned char uiTargetDqLayer; ///< setting target dq layer id
ERROR_CON_IDC eEcActiveIdc; ///< whether active error concealment feature in decoder
bool bParseOnly; ///< decoder for parse only, no reconstruction. When it is true, SPS/PPS size should not exceed SPS_PPS_BS_SIZE (128). Otherwise, it will return error info
SVideoProperty sVideoProperty; ///< video stream property
} SDecodingParam, *PDecodingParam;
/**
* @brief Bitstream inforamtion of a layer being encoded
*/
typedef struct {
unsigned char uiTemporalId;
unsigned char uiSpatialId;
unsigned char uiQualityId;
EVideoFrameType eFrameType;
unsigned char uiLayerType;
/**
* The sub sequence layers are ordered hierarchically based on their dependency on each other so that any picture in a layer shall not be
* predicted from any picture on any higher layer.
*/
int iSubSeqId; ///< refer to D.2.11 Sub-sequence information SEI message semantics
int iNalCount; ///< count number of NAL coded already
int* pNalLengthInByte; ///< length of NAL size in byte from 0 to iNalCount-1
unsigned char* pBsBuf; ///< buffer of bitstream contained
} SLayerBSInfo, *PLayerBSInfo;
/**
* @brief Frame bit stream info
*/
typedef struct {
int iLayerNum;
SLayerBSInfo sLayerInfo[MAX_LAYER_NUM_OF_FRAME];
EVideoFrameType eFrameType;
int iFrameSizeInBytes;
long long uiTimeStamp;
} SFrameBSInfo, *PFrameBSInfo;
/**
* @brief Structure for source picture
*/
typedef struct Source_Picture_s {
int iColorFormat; ///< color space type
int iStride[4]; ///< stride for each plane pData
unsigned char* pData[4]; ///< plane pData
int iPicWidth; ///< luma picture width in x coordinate
int iPicHeight; ///< luma picture height in y coordinate
long long uiTimeStamp; ///< timestamp of the source picture, unit: millisecond
} SSourcePicture;
/**
* @brief Structure for bit rate info
*/
typedef struct TagBitrateInfo {
LAYER_NUM iLayer;
int iBitrate; ///< the maximum bitrate
} SBitrateInfo;
/**
* @brief Structure for dump layer info
*/
typedef struct TagDumpLayer {
int iLayer;
char* pFileName;
} SDumpLayer;
/**
* @brief Structure for profile info in layer
*
*/
typedef struct TagProfileInfo {
int iLayer;
EProfileIdc uiProfileIdc; ///< the profile info
} SProfileInfo;
/**
* @brief Structure for level info in layer
*
*/
typedef struct TagLevelInfo {
int iLayer;
ELevelIdc uiLevelIdc; ///< the level info
} SLevelInfo;
/**
* @brief Structure for dilivery status
*
*/
typedef struct TagDeliveryStatus {
bool bDeliveryFlag; ///< 0: the previous frame isn't delivered,1: the previous frame is delivered
int iDropFrameType; ///< the frame type that is dropped; reserved
int iDropFrameSize; ///< the frame size that is dropped; reserved
} SDeliveryStatus;
/**
* @brief The capability of decoder, for SDP negotiation
*/
typedef struct TagDecoderCapability {
int iProfileIdc; ///< profile_idc
int iProfileIop; ///< profile-iop
int iLevelIdc; ///< level_idc
int iMaxMbps; ///< max-mbps
int iMaxFs; ///< max-fs
int iMaxCpb; ///< max-cpb
int iMaxDpb; ///< max-dpb
int iMaxBr; ///< max-br
bool bRedPicCap; ///< redundant-pic-cap
} SDecoderCapability;
/**
* @brief Structure for parse only output
*/
typedef struct TagParserBsInfo {
int iNalNum; ///< total NAL number in current AU
int* pNalLenInByte; ///< each nal length
unsigned char* pDstBuff; ///< outputted dst buffer for parsed bitstream
int iSpsWidthInPixel; ///< required SPS width info
int iSpsHeightInPixel; ///< required SPS height info
unsigned long long uiInBsTimeStamp; ///< input BS timestamp
unsigned long long uiOutBsTimeStamp; ///< output BS timestamp
} SParserBsInfo, *PParserBsInfo;
/**
* @brief Structure for encoder statistics
*/
typedef struct TagVideoEncoderStatistics {
unsigned int uiWidth; ///< the width of encoded frame
unsigned int uiHeight; ///< the height of encoded frame
//following standard, will be 16x aligned, if there are multiple spatial, this is of the highest
float fAverageFrameSpeedInMs; ///< average_Encoding_Time
// rate control related
float fAverageFrameRate; ///< the average frame rate in, calculate since encoding starts, supposed that the input timestamp is in unit of ms
float fLatestFrameRate; ///< the frame rate in, in the last second, supposed that the input timestamp is in unit of ms (? useful for checking BR, but is it easy to calculate?
unsigned int uiBitRate; ///< sendrate in Bits per second, calculated within the set time-window
unsigned int uiAverageFrameQP; ///< the average QP of last encoded frame
unsigned int uiInputFrameCount; ///< number of frames
unsigned int uiSkippedFrameCount; ///< number of frames
unsigned int uiResolutionChangeTimes; ///< uiResolutionChangeTimes
unsigned int uiIDRReqNum; ///< number of IDR requests
unsigned int uiIDRSentNum; ///< number of actual IDRs sent
unsigned int uiLTRSentNum; ///< number of LTR sent/marked
long long iStatisticsTs; ///< Timestamp of updating the statistics
unsigned long iTotalEncodedBytes;
unsigned long iLastStatisticsBytes;
unsigned long iLastStatisticsFrameCount;
} SEncoderStatistics;
/**
* @brief Structure for decoder statistics
*/
typedef struct TagVideoDecoderStatistics {
unsigned int uiWidth; ///< the width of encode/decode frame
unsigned int uiHeight; ///< the height of encode/decode frame
float fAverageFrameSpeedInMs; ///< average_Decoding_Time
float fActualAverageFrameSpeedInMs; ///< actual average_Decoding_Time, including freezing pictures
unsigned int uiDecodedFrameCount; ///< number of frames
unsigned int uiResolutionChangeTimes; ///< uiResolutionChangeTimes
unsigned int uiIDRCorrectNum; ///< number of correct IDR received
//EC on related
unsigned int
uiAvgEcRatio; ///< when EC is on, the average ratio of total EC areas, can be an indicator of reconstruction quality
unsigned int
uiAvgEcPropRatio; ///< when EC is on, the rough average ratio of propogate EC areas, can be an indicator of reconstruction quality
unsigned int uiEcIDRNum; ///< number of actual unintegrity IDR or not received but eced
unsigned int uiEcFrameNum; ///<
unsigned int uiIDRLostNum; ///< number of whole lost IDR
unsigned int
uiFreezingIDRNum; ///< number of freezing IDR with error (partly received), under resolution change
unsigned int uiFreezingNonIDRNum; ///< number of freezing non-IDR with error
int iAvgLumaQp; ///< average luma QP. default: -1, no correct frame outputted
int iSpsReportErrorNum; ///< number of Sps Invalid report
int iSubSpsReportErrorNum; ///< number of SubSps Invalid report
int iPpsReportErrorNum; ///< number of Pps Invalid report
int iSpsNoExistNalNum; ///< number of Sps NoExist Nal
int iSubSpsNoExistNalNum; ///< number of SubSps NoExist Nal
int iPpsNoExistNalNum; ///< number of Pps NoExist Nal
unsigned int uiProfile; ///< Profile idc in syntax
unsigned int uiLevel; ///< level idc according to Annex A-1
int iCurrentActiveSpsId; ///< current active SPS id
int iCurrentActivePpsId; ///< current active PPS id
unsigned int iStatisticsLogInterval; ///< frame interval of statistics log
} SDecoderStatistics; // in building, coming soon
/**
* @brief Structure for sample aspect ratio (SAR) info in VUI
*/
typedef struct TagVuiSarInfo {
unsigned int uiSarWidth; ///< SAR width
unsigned int uiSarHeight; ///< SAR height
bool bOverscanAppropriateFlag; ///< SAR overscan flag
} SVuiSarInfo, *PVuiSarInfo;
#endif//WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__

View File

@ -0,0 +1,216 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef WELS_VIDEO_CODEC_DEFINITION_H__
#define WELS_VIDEO_CODEC_DEFINITION_H__
/**
* @file codec_def.h
*/
/**
* @brief Enumerate the type of video format
*/
typedef enum {
videoFormatRGB = 1, ///< rgb color formats
videoFormatRGBA = 2,
videoFormatRGB555 = 3,
videoFormatRGB565 = 4,
videoFormatBGR = 5,
videoFormatBGRA = 6,
videoFormatABGR = 7,
videoFormatARGB = 8,
videoFormatYUY2 = 20, ///< yuv color formats
videoFormatYVYU = 21,
videoFormatUYVY = 22,
videoFormatI420 = 23, ///< the same as IYUV
videoFormatYV12 = 24,
videoFormatInternal = 25, ///< only used in SVC decoder testbed
videoFormatNV12 = 26, ///< new format for output by DXVA decoding
videoFormatVFlip = 0x80000000
} EVideoFormatType;
/**
* @brief Enumerate video frame type
*/
typedef enum {
videoFrameTypeInvalid, ///< encoder not ready or parameters are invalidate
videoFrameTypeIDR, ///< IDR frame in H.264
videoFrameTypeI, ///< I frame type
videoFrameTypeP, ///< P frame type
videoFrameTypeSkip, ///< skip the frame based encoder kernel
videoFrameTypeIPMixed ///< a frame where I and P slices are mixing, not supported yet
} EVideoFrameType;
/**
* @brief Enumerate return type
*/
typedef enum {
cmResultSuccess, ///< successful
cmInitParaError, ///< parameters are invalid
cmUnknownReason,
cmMallocMemeError, ///< malloc a memory error
cmInitExpected, ///< initial action is expected
cmUnsupportedData
} CM_RETURN;
/**
* @brief Enumulate the nal unit type
*/
enum ENalUnitType {
NAL_UNKNOWN = 0,
NAL_SLICE = 1,
NAL_SLICE_DPA = 2,
NAL_SLICE_DPB = 3,
NAL_SLICE_DPC = 4,
NAL_SLICE_IDR = 5, ///< ref_idc != 0
NAL_SEI = 6, ///< ref_idc == 0
NAL_SPS = 7,
NAL_PPS = 8
///< ref_idc == 0 for 6,9,10,11,12
};
/**
* @brief NRI: eNalRefIdc
*/
enum ENalPriority {
NAL_PRIORITY_DISPOSABLE = 0,
NAL_PRIORITY_LOW = 1,
NAL_PRIORITY_HIGH = 2,
NAL_PRIORITY_HIGHEST = 3
};
#define IS_PARAMETER_SET_NAL(eNalRefIdc, eNalType) \
( (eNalRefIdc == NAL_PRIORITY_HIGHEST) && (eNalType == (NAL_SPS|NAL_PPS) || eNalType == NAL_SPS) )
#define IS_IDR_NAL(eNalRefIdc, eNalType) \
( (eNalRefIdc == NAL_PRIORITY_HIGHEST) && (eNalType == NAL_SLICE_IDR) )
#define FRAME_NUM_PARAM_SET (-1)
#define FRAME_NUM_IDR 0
/**
* @brief eDeblockingIdc
*/
enum {
DEBLOCKING_IDC_0 = 0,
DEBLOCKING_IDC_1 = 1,
DEBLOCKING_IDC_2 = 2
};
#define DEBLOCKING_OFFSET (6)
#define DEBLOCKING_OFFSET_MINUS (-6)
/* Error Tools definition */
typedef unsigned short ERR_TOOL;
/**
@brief to do
*/
enum {
ET_NONE = 0x00, ///< NONE Error Tools
ET_IP_SCALE = 0x01, ///< IP Scalable
ET_FMO = 0x02, ///< Flexible Macroblock Ordering
ET_IR_R1 = 0x04, ///< Intra Refresh in predifined 2% MB
ET_IR_R2 = 0x08, ///< Intra Refresh in predifined 5% MB
ET_IR_R3 = 0x10, ///< Intra Refresh in predifined 10% MB
ET_FEC_HALF = 0x20, ///< Forward Error Correction in 50% redundency mode
ET_FEC_FULL = 0x40, ///< Forward Error Correction in 100% redundency mode
ET_RFS = 0x80 ///< Reference Frame Selection
};
/**
* @brief Information of coded Slice(=NAL)(s)
*/
typedef struct SliceInformation {
unsigned char* pBufferOfSlices; ///< base buffer of coded slice(s)
int iCodedSliceCount; ///< number of coded slices
unsigned int* pLengthOfSlices; ///< array of slices length accordingly by number of slice
int iFecType; ///< FEC type[0, 50%FEC, 100%FEC]
unsigned char uiSliceIdx; ///< index of slice in frame [FMO: 0,..,uiSliceCount-1; No FMO: 0]
unsigned char uiSliceCount; ///< count number of slice in frame [FMO: 2-8; No FMO: 1]
char iFrameIndex; ///< index of frame[-1, .., idr_interval-1]
unsigned char uiNalRefIdc; ///< NRI, priority level of slice(NAL)
unsigned char uiNalType; ///< NAL type
unsigned char
uiContainingFinalNal; ///< whether final NAL is involved in buffer of coded slices, flag used in Pause feature in T27
} SliceInfo, *PSliceInfo;
/**
* @brief thresholds of the initial, maximal and minimal rate
*/
typedef struct {
int iWidth; ///< frame width
int iHeight; ///< frame height
int iThresholdOfInitRate; ///< threshold of initial rate
int iThresholdOfMaxRate; ///< threshold of maximal rate
int iThresholdOfMinRate; ///< threshold of minimal rate
int iMinThresholdFrameRate; ///< min frame rate min
int iSkipFrameRate; ///< skip to frame rate min
int iSkipFrameStep; ///< how many frames to skip
} SRateThresholds, *PRateThresholds;
/**
* @brief Structure for decoder memery
*/
typedef struct TagSysMemBuffer {
int iWidth; ///< width of decoded pic for display
int iHeight; ///< height of decoded pic for display
int iFormat; ///< type is "EVideoFormatType"
int iStride[2]; ///< stride of 2 component
} SSysMEMBuffer;
/**
* @brief Buffer info
*/
typedef struct TagBufferInfo {
int iBufferStatus; ///< 0: one frame data is not ready; 1: one frame data is ready
unsigned long long uiInBsTimeStamp; ///< input BS timestamp
unsigned long long uiOutYuvTimeStamp; ///< output YUV timestamp, when bufferstatus is 1
union {
SSysMEMBuffer sSystemBuffer; ///< memory info for one picture
} UsrData; ///< output buffer info
unsigned char* pDst[3]; //point to picture YUV data
} SBufferInfo;
/**
* @brief In a GOP, multiple of the key frame number, derived from
* the number of layers(index or array below)
*/
static const char kiKeyNumMultiple[] = {
1, 1, 2, 4, 8, 16,
};
#endif//WELS_VIDEO_CODEC_DEFINITION_H__

View File

@ -0,0 +1,15 @@
//The current file is auto-generated by script: generate_codec_ver.sh
#ifndef CODEC_VER_H
#define CODEC_VER_H
#include "codec_app_def.h"
static const OpenH264Version g_stCodecVersion = {2, 1, 0, 2002};
static const char* const g_strCodecVer = "OpenH264 version:2.1.0.2002";
#define OPENH264_MAJOR (2)
#define OPENH264_MINOR (1)
#define OPENH264_REVISION (0)
#define OPENH264_RESERVED (2002)
#endif // CODEC_VER_H

View File

@ -0,0 +1,83 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON
.syntax unified
#ifdef __APPLE__
.text
.macro WELS_ASM_FUNC_BEGIN
.align 2
.arm
.globl _$0
_$0:
.endm
.macro WELS_ASM_FUNC_END
mov pc, lr
.endm
#else
#ifdef __ELF__
.section .note.GNU-stack,"",%progbits // Mark stack as non-executable
#endif
.text
#ifdef __ELF__
.arch armv7-a
.fpu neon
#endif
.macro WELS_ASM_FUNC_BEGIN funcName
.align 2
.arm
.global \funcName
#ifdef __ELF__
.type \funcName, %function
#endif
#ifndef __clang__
.func \funcName
#endif
\funcName:
.endm
.macro WELS_ASM_FUNC_END
mov pc, lr
#ifndef __clang__
.endfunc
#endif
.endm
#endif
#endif

View File

@ -0,0 +1,161 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON
#include "arm_arch_common_macro.S"
.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: \arg0~\arg3, src*, src_stride
vld1.64 {\arg0}, [\arg4,:128], \arg5
vld1.64 {\arg1}, [\arg4,:128], \arg5
vld1.64 {\arg2}, [\arg4,:128], \arg5
vld1.64 {\arg3}, [\arg4,:128], \arg5
// }
.endm
.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: \arg0~\arg3, dst*, dst_stride
vst1.64 {\arg0}, [\arg4,:128], \arg5
vst1.64 {\arg1}, [\arg4,:128], \arg5
vst1.64 {\arg2}, [\arg4,:128], \arg5
vst1.64 {\arg3}, [\arg4,:128], \arg5
// }
.endm
.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: \arg0~\arg3, src*, src_stride
vld1.64 {\arg0}, [\arg4], \arg5
vld1.64 {\arg1}, [\arg4], \arg5
vld1.64 {\arg2}, [\arg4], \arg5
vld1.64 {\arg3}, [\arg4], \arg5
// }
.endm
.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: \arg0~\arg3, dst*, dst_stride
vst1.64 {\arg0}, [\arg4], \arg5
vst1.64 {\arg1}, [\arg4], \arg5
vst1.64 {\arg2}, [\arg4], \arg5
vst1.64 {\arg3}, [\arg4], \arg5
// }
.endm
WELS_ASM_FUNC_BEGIN WelsCopy8x8_neon
LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsCopy16x16_neon
LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsCopy16x16NotAligned_neon
LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsCopy16x8NotAligned_neon
LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsCopy8x16_neon
LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
WELS_ASM_FUNC_END
#endif

View File

@ -0,0 +1,857 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON
#include "arm_arch_common_macro.S"
.macro JMP_IF_128BITS_IS_ZERO arg0, arg1, arg2
vorr.s16 \arg2, \arg0, \arg1
vmov r3, r2, \arg2
orr r3, r3, r2
cmp r3, #0
.endm
.macro MASK_MATRIX arg0, arg1, arg2, arg3, arg4, arg5, arg6
vabd.u8 \arg6, \arg1, \arg2
vcgt.u8 \arg6, \arg4, \arg6
vabd.u8 \arg4, \arg0, \arg1
vclt.u8 \arg4, \arg4, \arg5
vand.u8 \arg6, \arg6, \arg4
vabd.u8 \arg4, \arg3, \arg2
vclt.u8 \arg4, \arg4, \arg5
vand.u8 \arg6, \arg6, \arg4
.endm
.macro DIFF_LUMA_LT4_P1_Q1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
vmov.i8 \arg9, #128
vrhadd.u8 \arg8, \arg2, \arg3
vhadd.u8 \arg8, \arg0, \arg8
vsub.s8 \arg8, \arg8, \arg9
vsub.s8 \arg9, \arg1, \arg9
vqsub.s8 \arg8, \arg8, \arg9
vmax.s8 \arg8, \arg8, \arg5
vmin.s8 \arg8, \arg8, \arg6
vabd.u8 \arg9, \arg0, \arg2
vclt.u8 \arg9, \arg9, \arg4
vand.s8 \arg8, \arg8, \arg9
vand.s8 \arg8, \arg8, \arg7
vadd.u8 \arg8, \arg1, \arg8
vabs.s8 \arg9, \arg9
.endm
.macro DIFF_LUMA_LT4_P0_Q0 arg0, arg1, arg2, arg3, arg4, arg5, arg6
vsubl.u8 \arg5, \arg0, \arg3
vsubl.u8 \arg6, \arg2, \arg1
vshl.s16 \arg6, \arg6, #2
vadd.s16 \arg5, \arg5, \arg6
vqrshrn.s16 \arg4, \arg5, #3
.endm
.macro DIFF_LUMA_EQ4_P2P1P0 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
vaddl.u8 q4, \arg1, \arg2
vaddl.u8 q5, \arg3, \arg4
vadd.u16 q5, q4, q5
vaddl.u8 q4, \arg0, \arg1
vshl.u16 q4, q4, #1
vadd.u16 q4, q5, q4
vrshrn.u16 \arg0, q5, #2
vrshrn.u16 \arg7, q4, #3
vshl.u16 q5, q5, #1
vsubl.u8 q4, \arg5, \arg1
vadd.u16 q5, q4,q5
vaddl.u8 q4, \arg2, \arg5
vaddw.u8 q4, q4, \arg2
vaddw.u8 q4, q4, \arg3
vrshrn.u16 d10,q5, #3
vrshrn.u16 d8, q4, #2
vbsl.u8 \arg6, d10, d8
.endm
.macro DIFF_LUMA_EQ4_MASK arg0, arg1, arg2, arg3
vmov \arg3, \arg2
vbsl.u8 \arg3, \arg0, \arg1
.endm
.macro DIFF_CHROMA_EQ4_P0Q0 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8
vaddl.u8 \arg4, \arg0, \arg3
vaddw.u8 \arg5, \arg4, \arg1
vaddw.u8 \arg6, \arg4, \arg2
vaddw.u8 \arg5, \arg5, \arg0
vaddw.u8 \arg6, \arg6, \arg3
vrshrn.u16 \arg7, \arg5, #2
vrshrn.u16 \arg8, \arg6, #2
.endm
.macro LOAD_CHROMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8
vld4.u8 {\arg0[\arg8],\arg1[\arg8],\arg2[\arg8],\arg3[\arg8]}, [r0], r2
vld4.u8 {\arg4[\arg8],\arg5[\arg8],\arg6[\arg8],\arg7[\arg8]}, [r1], r2
.endm
.macro STORE_CHROMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8
vst4.u8 {\arg0[\arg8],\arg1[\arg8],\arg2[\arg8],\arg3[\arg8]}, [r0], r2
vst4.u8 {\arg4[\arg8],\arg5[\arg8],\arg6[\arg8],\arg7[\arg8]}, [r1], r2
.endm
.macro LOAD_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6
vld3.u8 {\arg0[\arg6],\arg1[\arg6],\arg2[\arg6]}, [r2], r1
vld3.u8 {\arg3[\arg6],\arg4[\arg6],\arg5[\arg6]}, [r0], r1
.endm
.macro STORE_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5
vst4.u8 {\arg0[\arg4],\arg1[\arg4],\arg2[\arg4],\arg3[\arg4]}, [r0], r1
vst4.u8 {\arg0[\arg5],\arg1[\arg5],\arg2[\arg5],\arg3[\arg5]}, [r2], r1
.endm
.macro STORE_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6
vst3.u8 {\arg0[\arg6],\arg1[\arg6],\arg2[\arg6]}, [r3], r1
vst3.u8 {\arg3[\arg6],\arg4[\arg6],\arg5[\arg6]}, [r0], r1
.endm
.macro EXTRACT_DELTA_INTO_TWO_PART arg0, arg1
vcge.s8 \arg1, \arg0, #0
vand \arg1, \arg0, \arg1
vsub.s8 \arg0, \arg1, \arg0
.endm
WELS_ASM_FUNC_BEGIN DeblockLumaLt4V_neon
vpush {q4-q7}
vdup.u8 q11, r2
vdup.u8 q9, r3
add r2, r1, r1, lsl #1
sub r2, r0, r2
vld1.u8 {q0}, [r2], r1
vld1.u8 {q3}, [r0], r1
vld1.u8 {q1}, [r2], r1
vld1.u8 {q4}, [r0], r1
vld1.u8 {q2}, [r2]
vld1.u8 {q5}, [r0]
sub r2, r2, r1
ldr r3, [sp, #64]
vld1.s8 {d31}, [r3]
vdup.s8 d28, d31[0]
vdup.s8 d30, d31[1]
vdup.s8 d29, d31[2]
vdup.s8 d31, d31[3]
vtrn.32 d28, d30
vtrn.32 d29, d31
vcge.s8 q10, q14, #0
MASK_MATRIX q1, q2, q3, q4, q11, q9, q15
vand.u8 q10, q10, q15
veor q15, q15
vsub.i8 q15,q15,q14
DIFF_LUMA_LT4_P1_Q1 q0, q1, q2, q3, q9, q15, q14, q10, q6, q12
vst1.u8 {q6}, [r2], r1
DIFF_LUMA_LT4_P1_Q1 q5, q4, q3, q2, q9, q15, q14, q10, q7, q13
vabs.s8 q12, q12
vabs.s8 q13, q13
vadd.u8 q14,q14,q12
vadd.u8 q14,q14,q13
veor q15, q15
vsub.i8 q15,q15,q14
DIFF_LUMA_LT4_P0_Q0 d2, d4, d6, d8, d16, q12, q13
DIFF_LUMA_LT4_P0_Q0 d3, d5, d7, d9, d17, q12, q13
vmax.s8 q8, q8, q15
vmin.s8 q8, q8, q14
vand.s8 q8, q8, q10
EXTRACT_DELTA_INTO_TWO_PART q8, q9
vqadd.u8 q2, q2, q9
vqsub.u8 q2, q2, q8
vst1.u8 {q2}, [r2], r1
vqsub.u8 q3, q3, q9
vqadd.u8 q3, q3, q8
vst1.u8 {q3}, [r2] , r1
vst1.u8 {q7}, [r2]
vpop {q4-q7}
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN DeblockLumaEq4V_neon
vpush {q4-q7}
vdup.u8 q5, r2
vdup.u8 q4, r3
sub r3, r0, r1, lsl #2
vld1.u8 {q8}, [r3], r1
vld1.u8 {q12}, [r0], r1
vld1.u8 {q9}, [r3], r1
vld1.u8 {q13}, [r0], r1
vld1.u8 {q10}, [r3], r1
vld1.u8 {q14}, [r0], r1
vld1.u8 {q11}, [r3]
vld1.u8 {q15}, [r0]
sub r3, r3, r1 , lsl #1
MASK_MATRIX q10, q11, q12, q13, q5, q4, q6
mov r2, r2, lsr #2
add r2, r2, #2
vdup.u8 q5, r2
vabd.u8 q0, q11, q12
vclt.u8 q7, q0, q5
vabd.u8 q1, q9, q11
vclt.u8 q1, q1, q4
vand.s8 q1, q1, q7
vabd.u8 q2, q14,q12
vclt.u8 q2, q2, q4
vand.s8 q2, q2, q7
vand.u8 q7, q7, q6
vmov q3, q1
DIFF_LUMA_EQ4_P2P1P0 d16, d18, d20, d22, d24, d26, d2, d0
DIFF_LUMA_EQ4_P2P1P0 d17, d19, d21, d23, d25, d27, d3, d1
vand.u8 q3, q7, q3
DIFF_LUMA_EQ4_MASK q0, q9, q3, q4
vst1.u8 {q4}, [r3], r1
DIFF_LUMA_EQ4_MASK q8,q10, q3, q4
vst1.u8 {q4}, [r3], r1
DIFF_LUMA_EQ4_MASK q1,q11, q6, q4
vst1.u8 {q4}, [r3], r1
vmov q0, q2
DIFF_LUMA_EQ4_P2P1P0 d30, d28, d26, d24, d22, d20, d4, d6
DIFF_LUMA_EQ4_P2P1P0 d31, d29, d27, d25, d23, d21, d5, d7
vand.u8 q0, q7, q0
DIFF_LUMA_EQ4_MASK q2, q12, q6, q4
vst1.u8 {q4}, [r3], r1
DIFF_LUMA_EQ4_MASK q15, q13, q0, q4
vst1.u8 {q4}, [r3], r1
DIFF_LUMA_EQ4_MASK q3, q14, q0, q4
vst1.u8 {q4}, [r3], r1
vpop {q4-q7}
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN DeblockLumaLt4H_neon
vpush {q4-q7}
vdup.u8 q11, r2
vdup.u8 q9, r3
sub r2, r0, #3
LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 0
LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 1
LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 2
LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 3
LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 4
LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 5
LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 6
LOAD_LUMA_DATA_3 d0, d1, d2, d6, d7, d8, 7
LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 0
LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 1
LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 2
LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 3
LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 4
LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 5
LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 6
LOAD_LUMA_DATA_3 d3, d4, d5, d9, d10, d11, 7
vswp d1, d2
vswp d3, d4
vswp d1, d4
vswp d7, d8
vswp d9, d10
vswp d7, d10
sub r0, r0, r1, lsl #4
ldr r3, [sp, #64]
vld1.s8 {d31}, [r3]
vdup.s8 d28, d31[0]
vdup.s8 d30, d31[1]
vdup.s8 d29, d31[2]
vdup.s8 d31, d31[3]
vtrn.32 d28, d30
vtrn.32 d29, d31
vcge.s8 q10, q14, #0
MASK_MATRIX q1, q2, q3, q4, q11, q9, q15
vand.u8 q10, q10, q15
veor q15, q15
vsub.i8 q15,q15,q14
DIFF_LUMA_LT4_P1_Q1 q0, q1, q2, q3, q9, q15, q14, q10, q6, q12
DIFF_LUMA_LT4_P1_Q1 q5, q4, q3, q2, q9, q15, q14, q10, q7, q13
vabs.s8 q12, q12
vabs.s8 q13, q13
vadd.u8 q14,q14,q12
vadd.u8 q14,q14,q13
veor q15, q15
vsub.i8 q15,q15,q14
DIFF_LUMA_LT4_P0_Q0 d2, d4, d6, d8, d16, q12, q13
DIFF_LUMA_LT4_P0_Q0 d3, d5, d7, d9, d17, q12, q13
vmax.s8 q8, q8, q15
vmin.s8 q8, q8, q14
vand.s8 q8, q8, q10
EXTRACT_DELTA_INTO_TWO_PART q8, q9
vqadd.u8 q2, q2, q9
vqsub.u8 q2, q2, q8
vqsub.u8 q3, q3, q9
vqadd.u8 q3, q3, q8
sub r0, #2
add r2, r0, r1
lsl r1, #1
vmov q1, q6
vmov q4, q7
vswp q2, q3
vswp d3, d6
vswp d5, d8
STORE_LUMA_DATA_4 d2, d3, d4, d5, 0, 1
STORE_LUMA_DATA_4 d2, d3, d4, d5, 2, 3
STORE_LUMA_DATA_4 d2, d3, d4, d5, 4, 5
STORE_LUMA_DATA_4 d2, d3, d4, d5, 6, 7
STORE_LUMA_DATA_4 d6, d7, d8, d9, 0, 1
STORE_LUMA_DATA_4 d6, d7, d8, d9, 2, 3
STORE_LUMA_DATA_4 d6, d7, d8, d9, 4, 5
STORE_LUMA_DATA_4 d6, d7, d8, d9, 6, 7
vpop {q4-q7}
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN DeblockLumaEq4H_neon
vpush {q4-q7}
vdup.u8 q5, r2
vdup.u8 q4, r3
sub r3, r0, #4 // pix -= 4
vld1.u8 {d16}, [r3], r1
vld1.u8 {d17}, [r3], r1
vld1.u8 {d18}, [r3], r1
vld1.u8 {d19}, [r3], r1
vld1.u8 {d20}, [r3], r1
vld1.u8 {d21}, [r3], r1
vld1.u8 {d22}, [r3], r1
vld1.u8 {d23}, [r3], r1
vld1.u8 {d24}, [r3], r1
vld1.u8 {d25}, [r3], r1
vld1.u8 {d26}, [r3], r1
vld1.u8 {d27}, [r3], r1
vld1.u8 {d28}, [r3], r1
vld1.u8 {d29}, [r3], r1
vld1.u8 {d30}, [r3], r1
vld1.u8 {d31}, [r3], r1
vtrn.u32 d16, d20
vtrn.u32 d17, d21
vtrn.u32 d18, d22
vtrn.u32 d19, d23
vtrn.u32 d24, d28
vtrn.u32 d25, d29
vtrn.u32 d26, d30
vtrn.u32 d27, d31
vtrn.u16 d16, d18
vtrn.u16 d17, d19
vtrn.u16 d20, d22
vtrn.u16 d21, d23
vtrn.u16 d24, d26
vtrn.u16 d25, d27
vtrn.u16 d28, d30
vtrn.u16 d29, d31
vtrn.u8 d16, d17
vtrn.u8 d18, d19
vtrn.u8 d20, d21
vtrn.u8 d22, d23
vtrn.u8 d24, d25
vtrn.u8 d26, d27
vtrn.u8 d28, d29
vtrn.u8 d30, d31
vswp d17, d24
vswp d19, d26
vswp d21, d28
vswp d23, d30
vswp q12, q9
vswp q14, q11
vswp q12, q10
vswp q13, q11
MASK_MATRIX q10, q11, q12, q13, q5, q4, q6
mov r2, r2, lsr #2
add r2, r2, #2
vdup.u8 q5, r2
vabd.u8 q0, q11, q12
vclt.u8 q7, q0, q5
vabd.u8 q1, q9, q11
vclt.u8 q1, q1, q4
vand.s8 q1, q1, q7
vabd.u8 q2, q14,q12
vclt.u8 q2, q2, q4
vand.s8 q2, q2, q7
vand.u8 q7, q7, q6
vmov q3, q1
DIFF_LUMA_EQ4_P2P1P0 d16, d18, d20, d22, d24, d26, d2, d0
DIFF_LUMA_EQ4_P2P1P0 d17, d19, d21, d23, d25, d27, d3, d1
vand.u8 q3, q7, q3
DIFF_LUMA_EQ4_MASK q0, q9, q3, q4
vmov q9, q4
vbsl.u8 q3, q8, q10
DIFF_LUMA_EQ4_MASK q1,q11, q6, q8
vand.u8 q7, q7, q2
DIFF_LUMA_EQ4_P2P1P0 d30, d28, d26, d24, d22, d20, d4, d0
DIFF_LUMA_EQ4_P2P1P0 d31, d29, d27, d25, d23, d21, d5, d1
vbsl.u8 q6, q2, q12
DIFF_LUMA_EQ4_MASK q15, q13, q7, q4
vbsl.u8 q7, q0, q14
vmov q5, q6
vmov q2, q9
vmov q6, q4
vmov q4, q8
vswp d8, d6
vswp d5, d7
vswp d5, d8
vswp d14, d12
vswp d11, d13
vswp d11, d14
sub r3, r0, #3
STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,0
STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,1
STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,2
STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,3
STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,4
STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,5
STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,6
STORE_LUMA_DATA_3 d4,d5,d6,d10,d11,d12,7
STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,0
STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,1
STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,2
STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,3
STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,4
STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,5
STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,6
STORE_LUMA_DATA_3 d7,d8,d9,d13,d14,d15,7
vpop {q4-q7}
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN DeblockChromaLt4V_neon
vdup.u8 q11, r3
ldr r3, [sp, #0]
sub r0, r0, r2 , lsl #1
sub r1, r1, r2, lsl #1
vdup.u8 q9, r3
ldr r3, [sp, #4]
vld1.u8 {d0}, [r0], r2
vld1.u8 {d1}, [r1], r2
vld1.u8 {d2}, [r0], r2
vld1.u8 {d3}, [r1], r2
vld1.u8 {d4}, [r0], r2
vld1.u8 {d5}, [r1], r2
vld1.u8 {d6}, [r0]
vld1.u8 {d7}, [r1]
sub r0, r0, r2, lsl #1
sub r1, r1, r2, lsl #1
vld1.s8 {d31}, [r3]
vmovl.u8 q14,d31
vshl.u64 d29,d28,#8
vorr d28,d29
vmov d29, d28
veor q15, q15
vsub.i8 q15,q15,q14
MASK_MATRIX q0, q1, q2, q3, q11, q9, q10
DIFF_LUMA_LT4_P0_Q0 d0, d2, d4, d6, d16, q12, q13
DIFF_LUMA_LT4_P0_Q0 d1, d3, d5, d7, d17, q12, q13
vmax.s8 q8, q8, q15
vmin.s8 q8, q8, q14
vand.s8 q8, q8, q10
vcge.s8 q14, q14, #0
vand.s8 q8, q8, q14
EXTRACT_DELTA_INTO_TWO_PART q8, q10
vqadd.u8 q1, q1, q10
vqsub.u8 q1, q1, q8
vst1.u8 {d2}, [r0], r2
vst1.u8 {d3}, [r1], r2
vqsub.u8 q2, q2, q10
vqadd.u8 q2, q2, q8
vst1.u8 {d4}, [r0]
vst1.u8 {d5}, [r1]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN DeblockChromaEq4V_neon
vpush {q4-q5}
vdup.u8 q11, r3
ldr r3, [sp, #32]
sub r0, r0, r2 , lsl #1
sub r1, r1, r2, lsl #1
vdup.u8 q9, r3
vld1.u8 {d0}, [r0], r2 // q0::p1
vld1.u8 {d1}, [r1], r2
vld1.u8 {d2}, [r0], r2 // q1::p0
vld1.u8 {d3}, [r1], r2
vld1.u8 {d4}, [r0], r2 // q2::q0
vld1.u8 {d5}, [r1], r2
vld1.u8 {d6}, [r0] // q3::q1
vld1.u8 {d7}, [r1]
sub r0, r0, r2, lsl #1 // pix = [-1*src_stride]
sub r1, r1, r2, lsl #1
MASK_MATRIX q0, q1, q2, q3, q11, q9, q10
vmov q11, q10
DIFF_CHROMA_EQ4_P0Q0 d0, d2, d4, d6, q4, q5, q8, d30, d0 // Cb::p0' q0'
DIFF_CHROMA_EQ4_P0Q0 d1, d3, d5, d7, q12, q13, q14, d31, d1 // Cr::p0' q0'
vbsl.u8 q10, q15, q1
vst1.u8 {d20}, [r0], r2
vst1.u8 {d21}, [r1], r2
vbsl.u8 q11, q0, q2
vst1.u8 {d22}, [r0]
vst1.u8 {d23}, [r1]
vpop {q4-q5}
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN DeblockChromaLt4H_neon
vdup.u8 q11, r3
ldr r3, [sp, #0]
sub r0, r0, #2
vdup.u8 q9, r3
ldr r3, [sp, #4]
sub r1, r1, #2
vld1.s8 {d31}, [r3]
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 0
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 1
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 2
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 3
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 4
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 5
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 6
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 7
vswp q1, q2
vswp d1, d2
vswp d6, d5
vmovl.u8 q14, d31
vshl.u64 d29,d28,#8
vorr d28,d29
vmov d29, d28
veor q15, q15
vsub.i8 q15,q15,q14
MASK_MATRIX q0, q1, q2, q3, q11, q9, q10
DIFF_LUMA_LT4_P0_Q0 d0, d2, d4, d6, d16, q12, q13
DIFF_LUMA_LT4_P0_Q0 d1, d3, d5, d7, d17, q12, q13
vmax.s8 q8, q8, q15
vmin.s8 q8, q8, q14
vand.s8 q8, q8, q10
vcge.s8 q14, q14, #0
vand.s8 q8, q8, q14
EXTRACT_DELTA_INTO_TWO_PART q8, q10
vqadd.u8 q1, q1, q10
vqsub.u8 q1, q1, q8
vqsub.u8 q2, q2, q10
vqadd.u8 q2, q2, q8
sub r0, r0, r2, lsl #3
sub r1, r1, r2, lsl #3
vswp d1, d2
vswp d6, d5
vswp q1, q2
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 0
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 1
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 2
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 3
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 4
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 5
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 6
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 7
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN DeblockChromaEq4H_neon
vpush {q4-q5}
vdup.u8 q11, r3
ldr r3, [sp, #32]
sub r0, r0, #2
sub r1, r1, #2
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 0
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 1
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 2
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 3
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 4
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 5
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 6
LOAD_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 7
vswp q1, q2
vswp d1, d2
vswp d6, d5
vdup.u8 q9, r3
MASK_MATRIX q0, q1, q2, q3, q11, q9, q10
vmov q11, q10
DIFF_CHROMA_EQ4_P0Q0 d0, d2, d4, d6, q8, q9, q12, d8, d10
DIFF_CHROMA_EQ4_P0Q0 d1, d3, d5, d7, q13, q14, q15, d9, d11
vbsl.u8 q10, q4, q1
vbsl.u8 q11, q5, q2
sub r0, r0, r2, lsl #3 // pix: 0th row [-2]
sub r1, r1, r2, lsl #3
vmov q1, q10
vmov q2, q11
vswp d1, d2
vswp d6, d5
vswp q1, q2
// Cb:d0d1d2d3, Cr:d4d5d6d7
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 0
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 1
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 2
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 3
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 4
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 5
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 6
STORE_CHROMA_DATA_4 d0, d1, d2, d3, d4, d5, d6, d7, 7
vpop {q4-q5}
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsNonZeroCount_neon
mov r1, #1
vdup.8 q2, r1
vld1.64 {d0,d1,d2}, [r0]
vmin.s8 q0, q0, q2
vmin.s8 d2, d2, d4
vst1.64 {d0,d1,d2}, [r0]
WELS_ASM_FUNC_END
.macro BS_NZC_CHECK arg0, arg1, arg2, arg3, arg4
vld1.8 {d0,d1}, [\arg0]
/* Arrenge the input data --- TOP */
ands r6, \arg1, #2
beq bs_nzc_check_jump0
sub r6, \arg0, \arg2, lsl #4
sub r6, r6, \arg2, lsl #3
add r6, #12
vld1.32 d3[1], [r6]
bs_nzc_check_jump0:
vext.8 q1, q1, q0, #12
vadd.u8 \arg3, q0, q1
/* Arrenge the input data --- LEFT */
ands r6, \arg1, #1
beq bs_nzc_check_jump1
sub r6, \arg0, #21
add r7, r6, #4
vld1.8 d3[4], [r6]
add r6, r7, #4
vld1.8 d3[5], [r7]
add r7, r6, #4
vld1.8 d3[6], [r6]
vld1.8 d3[7], [r7]
bs_nzc_check_jump1:
vzip.8 d0, d1
vzip.8 d0, d1
vext.8 q1, q1, q0, #12
vadd.u8 \arg4, q0, q1
.endm
.macro BS_COMPARE_MV arg0, arg1, arg2, arg3, arg4, arg5, arg6 //in: $0,$1(const),$2(const),$3(const),$4(const); out:$5, $6
mov r6, #4
vabd.s16 q8, \arg0, \arg1
vabd.s16 q9, \arg1, \arg2
vdup.s16 \arg0, r6
vabd.s16 q10, \arg2, \arg3
vabd.s16 q11, \arg3, \arg4
vcge.s16 q8, \arg0
vcge.s16 q9, \arg0
vcge.s16 q10, \arg0
vcge.s16 q11, \arg0
vpadd.i16 d16, d16, d17
vpadd.i16 d17, d18, d19
vpadd.i16 d18, d20, d21
vpadd.i16 d19, d22, d23
vaddhn.i16 \arg5, q8, q8
vaddhn.i16 \arg6, q9, q9
.endm
.macro BS_MV_CHECK arg0, arg1, arg2, arg3, arg4, arg5, arg6
vldm \arg0, {q0,q1,q2,q3}
/* Arrenge the input data --- TOP */
ands r6, \arg1, #2
beq bs_mv_check_jump0
sub r6, \arg0, \arg2, lsl #6
add r6, #48
vld1.8 {d8, d9}, [r6]
bs_mv_check_jump0:
BS_COMPARE_MV q4, q0, q1, q2, q3, \arg3, \arg4
/* Arrenge the input data --- LEFT */
ands r6, \arg1, #1
beq bs_mv_check_jump1
sub r6, \arg0, #52
add r7, r6, #16
vld1.32 d8[0], [r6]
add r6, r7, #16
vld1.32 d8[1], [r7]
add r7, r6, #16
vld1.32 d9[0], [r6]
vld1.32 d9[1], [r7]
bs_mv_check_jump1:
vzip.32 q0, q2
vzip.32 q1, q3
vzip.32 q0, q1
vzip.32 q2, q3
BS_COMPARE_MV q4, q0, q1, q2, q3, \arg5, \arg6
.endm
WELS_ASM_FUNC_BEGIN DeblockingBSCalcEnc_neon
stmdb sp!, {r5-r7}
vpush {q4}
ldr r5, [sp, #28] //Save BS to r5
/* Checking the nzc status */
BS_NZC_CHECK r0, r2, r3, q14, q15 //q14,q15 save the nzc status
/* For checking bS[I] = 2 */
mov r6, #2
vcgt.s8 q14, q14, #0
vdup.u8 q0, r6
vcgt.s8 q15, q15, #0
vand.u8 q14, q14, q0 //q14 save the nzc check result all the time --- for dir is top
vand.u8 q15, q15, q0 //q15 save the nzc check result all the time --- for dir is left
/* Checking the mv status*/
BS_MV_CHECK r1, r2, r3, d24, d25, d26, d27//q12, q13 save the mv status
/* For checking bS[I] = 1 */
mov r6, #1
vdup.u8 q0, r6
vand.u8 q12, q12, q0 //q12 save the nzc check result all the time --- for dir is top
vand.u8 q13, q13, q0 //q13 save the nzc check result all the time --- for dir is left
/* Check bS[I] is '1' or '2' */
vmax.u8 q1, q12, q14
vmax.u8 q0, q13, q15
//vstm r5, {q0, q1}
vst1.32 {q0, q1}, [r5]
vpop {q4}
ldmia sp!, {r5-r7}
WELS_ASM_FUNC_END
#endif

View File

@ -0,0 +1,154 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON
#include "arm_arch_common_macro.S"
WELS_ASM_FUNC_BEGIN ExpandPictureLuma_neon
stmdb sp!, {r4-r8}
//Save the dst
mov r7, r0
mov r8, r3
add r4, r7, r2
sub r4, #1
//For the left and right expand
_expand_picture_luma_loop2:
sub r5, r7, #32
add r6, r4, #1
vld1.8 {d0[], d1[]}, [r7], r1
vld1.8 {d2[], d3[]}, [r4], r1
vst1.8 {q0}, [r5]!
vst1.8 {q0}, [r5]
vst1.8 {q1}, [r6]!
vst1.8 {q1}, [r6]
subs r8, #1
bne _expand_picture_luma_loop2
//for the top and bottom expand
add r2, #64
sub r0, #32
mla r4, r1, r3, r0
sub r4, r1
_expand_picture_luma_loop0:
mov r5, #32
mls r5, r5, r1, r0
add r6, r4, r1
vld1.8 {q0}, [r0]!
vld1.8 {q1}, [r4]!
mov r8, #32
_expand_picture_luma_loop1:
vst1.8 {q0}, [r5], r1
vst1.8 {q1}, [r6], r1
subs r8, #1
bne _expand_picture_luma_loop1
subs r2, #16
bne _expand_picture_luma_loop0
//vldreq.32 d0, [r0]
ldmia sp!, {r4-r8}
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN ExpandPictureChroma_neon
stmdb sp!, {r4-r9}
//Save the dst
mov r7, r0
mov r8, r3
add r4, r7, r2
sub r4, #1
//For the left and right expand
_expand_picture_chroma_loop2:
sub r5, r7, #16
add r6, r4, #1
vld1.8 {d0[], d1[]}, [r7], r1
vld1.8 {d2[], d3[]}, [r4], r1
vst1.8 {q0}, [r5]
vst1.8 {q1}, [r6]
subs r8, #1
bne _expand_picture_chroma_loop2
//for the top and bottom expand
add r2, #32
mov r9, r2
bic r2, #15
sub r0, #16
mla r4, r1, r3, r0
sub r4, r1
_expand_picture_chroma_loop0:
mov r5, #16
mls r5, r5, r1, r0
add r6, r4, r1
vld1.8 {q0}, [r0]!
vld1.8 {q1}, [r4]!
mov r8, #16
_expand_picture_chroma_loop1:
vst1.8 {q0}, [r5], r1
vst1.8 {q1}, [r6], r1
subs r8, #1
bne _expand_picture_chroma_loop1
subs r2, #16
bne _expand_picture_chroma_loop0
//vldreq.32 d0, [r0]
and r9, #15
cmp r9, #8
bne _expand_picture_chroma_end
mov r5, #16
mls r5, r5, r1, r0
add r6, r4, r1
vld1.8 {d0}, [r0]!
vld1.8 {d2}, [r4]!
mov r8, #16
_expand_picture_chroma_loop3:
vst1.8 {d0}, [r5], r1
vst1.8 {d2}, [r6], r1
subs r8, #1
bne _expand_picture_chroma_loop3
_expand_picture_chroma_end:
ldmia sp!, {r4-r9}
WELS_ASM_FUNC_END
#endif

View File

@ -0,0 +1,82 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON
#include "arm_arch_common_macro.S"
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredV_neon
//Get the top line data to 'q0'
sub r3, r1, r2
vldm r3, {d0, d1}
//mov r2, #16
mov r3, #4
//Set the top line to the each line of MB(16*16)
loop_0_get_i16x16_luma_pred_v:
vst1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r0]!
subs r3, #1
bne loop_0_get_i16x16_luma_pred_v
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredH_neon
//stmdb sp!, {r4, lr}
sub r1, r1, #1
mov r3, #4
loop_0_get_i16x16_luma_pred_h:
//Get one byte data from left side
vld1.8 {d0[],d1[]}, [r1], r2
vld1.8 {d2[],d3[]}, [r1], r2
vld1.8 {d4[],d5[]}, [r1], r2
vld1.8 {d6[],d7[]}, [r1], r2
//Set the line of MB using the left side byte data
vst1.8 {d0,d1}, [r0]!
//add r0, #16
vst1.8 {d2,d3}, [r0]!
//add r0, #16
vst1.8 {d4,d5}, [r0]!
//add r0, #16
vst1.8 {d6,d7}, [r0]!
//add r0, #16
subs r3, #1
bne loop_0_get_i16x16_luma_pred_h
WELS_ASM_FUNC_END
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,76 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef __APPLE__
.text
.macro WELS_ASM_AARCH64_FUNC_BEGIN
.align 2
.globl _$0
_$0:
.endm
.macro WELS_ASM_AARCH64_FUNC_END
ret
.endm
#else
#ifdef __ELF__
.section .note.GNU-stack,"",%progbits // Mark stack as non-executable
#endif
.text
.macro WELS_ASM_AARCH64_FUNC_BEGIN funcName
.align 2
.global \funcName
#ifdef __ELF__
.type \funcName, %function
#endif
#ifndef __clang__
.func \funcName
#endif
\funcName:
.endm
.macro WELS_ASM_AARCH64_FUNC_END
ret
#ifndef __clang__
.endfunc
#endif
.endm
#endif
.macro SIGN_EXTENSION arg0, arg1
sxtw \arg0, \arg1
.endm

View File

@ -0,0 +1,202 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON_AARCH64
#include "arm_arch64_common_macro.S"
.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: $0~$3, src*, src_stride
ld1 {\arg0\().d}[0], [\arg4], \arg5
ld1 {\arg1\().d}[0], [\arg4], \arg5
ld1 {\arg2\().d}[0], [\arg4], \arg5
ld1 {\arg3\().d}[0], [\arg4], \arg5
// }
.endm
.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: $0~$3, dst*, dst_stride
st1 {\arg0\().d}[0], [\arg4], \arg5
st1 {\arg1\().d}[0], [\arg4], \arg5
st1 {\arg2\().d}[0], [\arg4], \arg5
st1 {\arg3\().d}[0], [\arg4], \arg5
// }
.endm
.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: $0~$3, src*, src_stride
ld1 {\arg0\().8b}, [\arg4], \arg5
ld1 {\arg1\().8b}, [\arg4], \arg5
ld1 {\arg2\().8b}, [\arg4], \arg5
ld1 {\arg3\().8b}, [\arg4], \arg5
// }
.endm
.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: $0~$3, dst*, dst_stride
st1 {\arg0\().8b}, [\arg4], \arg5
st1 {\arg1\().8b}, [\arg4], \arg5
st1 {\arg2\().8b}, [\arg4], \arg5
st1 {\arg3\().8b}, [\arg4], \arg5
// }
.endm
.macro LOAD16_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: $0~$3, src*, src_stride
ld1 {\arg0\().2d}, [\arg4], \arg5
ld1 {\arg1\().2d}, [\arg4], \arg5
ld1 {\arg2\().2d}, [\arg4], \arg5
ld1 {\arg3\().2d}, [\arg4], \arg5
// }
.endm
.macro STORE16_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: $0~$3, dst*, dst_stride
st1 {\arg0\().2d}, [\arg4], \arg5
st1 {\arg1\().2d}, [\arg4], \arg5
st1 {\arg2\().2d}, [\arg4], \arg5
st1 {\arg3\().2d}, [\arg4], \arg5
// }
.endm
.macro LOAD16_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: $0~$3, src*, src_stride
ld1 {\arg0\().16b}, [\arg4], \arg5
ld1 {\arg1\().16b}, [\arg4], \arg5
ld1 {\arg2\().16b}, [\arg4], \arg5
ld1 {\arg3\().16b}, [\arg4], \arg5
// }
.endm
.macro STORE16_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
// { // input: $0~$3, dst*, dst_stride
st1 {\arg0\().16b}, [\arg4], \arg5
st1 {\arg1\().16b}, [\arg4], \arg5
st1 {\arg2\().16b}, [\arg4], \arg5
st1 {\arg3\().16b}, [\arg4], \arg5
// }
.endm
//void WelsCopy8x8_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy8x8_AArch64_neon
SIGN_EXTENSION x1,w1
SIGN_EXTENSION x3,w3
LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3
STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x16_AArch64_neon
SIGN_EXTENSION x1,w1
SIGN_EXTENSION x3,w3
LOAD16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
STORE16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
LOAD16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
STORE16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
LOAD16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
STORE16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
LOAD16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
STORE16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x16NotAligned_AArch64_neon
SIGN_EXTENSION x1,w1
SIGN_EXTENSION x3,w3
LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x8NotAligned_AArch64_neon
SIGN_EXTENSION x1,w1
SIGN_EXTENSION x3,w3
LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy8x16_AArch64_neon
SIGN_EXTENSION x1,w1
SIGN_EXTENSION x3,w3
LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3
STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1
LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3
STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1
WELS_ASM_AARCH64_FUNC_END
#endif

View File

@ -0,0 +1,852 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON_AARCH64
#include "arm_arch64_common_macro.S"
.macro MASK_MATRIX arg0, arg1, arg2, arg3, arg4, arg5, arg6
uabd \arg6\().16b, \arg1\().16b, \arg2\().16b
cmhi \arg6\().16b, \arg4\().16b, \arg6\().16b
uabd \arg4\().16b, \arg0\().16b, \arg1\().16b
cmhi \arg4\().16b, \arg5\().16b, \arg4\().16b
and \arg6\().16b, \arg6\().16b, \arg4\().16b
uabd \arg4\().16b, \arg3\().16b, \arg2\().16b
cmhi \arg4\().16b, \arg5\().16b, \arg4\().16b
and \arg6\().16b, \arg6\().16b, \arg4\().16b
.endm
.macro DIFF_LUMA_LT4_P1_Q1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
//v0, v1, v2, v3, v17(beta), v18(-Tc0), v6(Tc0), v7(flag), v19, v20
urhadd \arg8\().16b, \arg2\().16b, \arg3\().16b
uhadd \arg8\().16b, \arg0\().16b, \arg8\().16b
usubl \arg9\().8h, \arg8\().8b, \arg1\().8b
sqxtn \arg9\().8b, \arg9\().8h
usubl2 \arg8\().8h, \arg8\().16b, \arg1\().16b
sqxtn2 \arg9\().16b, \arg8\().8h
smax \arg8\().16b, \arg9\().16b, \arg5\().16b
//
smin \arg8\().16b, \arg8\().16b, \arg6\().16b
uabd \arg9\().16b, \arg0\().16b, \arg2\().16b
cmhi \arg9\().16b, \arg4\().16b, \arg9\().16b
and \arg8\().16b, \arg8\().16b, \arg9\().16b
and \arg8\().16b, \arg8\().16b, \arg7\().16b
add \arg8\().16b, \arg1\().16b, \arg8\().16b
abs \arg9\().16b, \arg9\().16b
.endm
.macro DIFF_LUMA_LT4_P0_Q0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6
usubl \arg5\().8h, \arg0\().8b, \arg3\().8b
usubl \arg6\().8h, \arg2\().8b, \arg1\().8b
shl \arg6\().8h, \arg6\().8h, #2
add \arg5\().8h, \arg5\().8h, \arg6\().8h
sqrshrn \arg4\().8b, \arg5\().8h, #3
.endm
.macro DIFF_LUMA_LT4_P0_Q0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6
usubl2 \arg5\().8h, \arg0\().16b, \arg3\().16b
usubl2 \arg6\().8h, \arg2\().16b, \arg1\().16b
shl \arg6\().8h, \arg6\().8h, #2
add \arg5\().8h, \arg5\().8h, \arg6\().8h
sqrshrn2 \arg4\().16b, \arg5\().8h, #3
.endm
.macro EXTRACT_DELTA_INTO_TWO_PART arg0, arg1
cmge \arg1\().16b, \arg0\().16b, #0
and \arg1\().16b, \arg0\().16b, \arg1\().16b
sub \arg0\().16b, \arg1\().16b, \arg0\().16b
.endm
.macro DIFF_LUMA_EQ4_P2P1P0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
uaddl \arg8\().8h, \arg1\().8b, \arg2\().8b
uaddl \arg9\().8h, \arg3\().8b, \arg4\().8b
add \arg9\().8h, \arg9\().8h, \arg8\().8h
uaddl \arg8\().8h, \arg0\().8b, \arg1\().8b
shl \arg8\().8h, \arg8\().8h, #1
add \arg8\().8h, \arg9\().8h, \arg8\().8h
rshrn \arg0\().8b, \arg9\().8h, #2
rshrn \arg7\().8b, \arg8\().8h, #3
shl \arg9\().8h, \arg9\().8h, #1
usubl \arg8\().8h, \arg5\().8b, \arg1\().8b
add \arg9\().8h, \arg8\().8h, \arg9\().8h
uaddl \arg8\().8h, \arg2\().8b, \arg5\().8b
uaddw \arg8\().8h, \arg8\().8h, \arg2\().8b
uaddw \arg8\().8h, \arg8\().8h, \arg3\().8b
rshrn \arg9\().8b, \arg9\().8h, #3
rshrn \arg8\().8b, \arg8\().8h, #2
bsl \arg6\().8b, \arg9\().8b, \arg8\().8b
.endm
.macro DIFF_LUMA_EQ4_P2P1P0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
uaddl2 \arg8\().8h, \arg1\().16b, \arg2\().16b
uaddl2 \arg9\().8h, \arg3\().16b, \arg4\().16b
add \arg9\().8h, \arg9\().8h, \arg8\().8h
uaddl2 \arg8\().8h, \arg0\().16b, \arg1\().16b
shl \arg8\().8h, \arg8\().8h, #1
add \arg8\().8h, \arg9\().8h, \arg8\().8h
rshrn2 \arg0\().16b, \arg9\().8h, #2
rshrn2 \arg7\().16b, \arg8\().8h, #3
shl \arg9\().8h, \arg9\().8h, #1
usubl2 \arg8\().8h, \arg5\().16b, \arg1\().16b
add \arg9\().8h, \arg8\().8h, \arg9\().8h
uaddl2 \arg8\().8h, \arg2\().16b, \arg5\().16b
uaddw2 \arg8\().8h, \arg8\().8h, \arg2\().16b
uaddw2 \arg8\().8h, \arg8\().8h, \arg3\().16b
rshrn2 \arg9\().16b, \arg9\().8h, #3
rshrn2 \arg8\().16b, \arg8\().8h, #2
bsl \arg6\().16b, \arg9\().16b, \arg8\().16b
.endm
.macro DIFF_CHROMA_EQ4_P0Q0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
uaddl \arg4\().8h, \arg0\().8b, \arg3\().8b
shl \arg4\().8h, \arg4\().8h, #1
usubl \arg5\().8h, \arg1\().8b, \arg3\().8b
add \arg5\().8h, \arg5\().8h, \arg4\().8h
rshrn \arg6\().8b, \arg5\().8h, #2
usubl \arg5\().8h, \arg2\().8b, \arg0\().8b
add \arg5\().8h, \arg5\().8h, \arg4\().8h
rshrn \arg7\().8b, \arg5\().8h, #2
.endm
.macro DIFF_CHROMA_EQ4_P0Q0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
uaddl2 \arg4\().8h, \arg0\().16b, \arg3\().16b
shl \arg4\().8h, \arg4\().8h, #1
usubl2 \arg5\().8h, \arg1\().16b, \arg3\().16b
add \arg5\().8h, \arg5\().8h, \arg4\().8h
rshrn2 \arg6\().16b, \arg5\().8h, #2
usubl2 \arg5\().8h, \arg2\().16b, \arg0\().16b
add \arg5\().8h, \arg5\().8h, \arg4\().8h
rshrn2 \arg7\().16b, \arg5\().8h, #2
.endm
.macro DIFF_LUMA_EQ4_MASK arg0, arg1, arg2, arg3
mov \arg3\().16b, \arg2\().16b
bsl \arg3\().16b, \arg0\().16b, \arg1\().16b
.endm
.macro LOAD_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6
ld3 {\arg0\().b, \arg1\().b, \arg2\().b} [\arg6], [x2], x1
ld3 {\arg3\().b, \arg4\().b, \arg5\().b} [\arg6], [x0], x1
.endm
.macro LOAD_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8
ld4 {\arg0\().b, \arg1\().b, \arg2\().b, \arg3\().b} [\arg8], [x3], x1
ld4 {\arg4\().b, \arg5\().b, \arg6\().b, \arg7\().b} [\arg8], [x0], x1
.endm
.macro STORE_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5
st4 {\arg0\().b, \arg1\().b, \arg2\().b, \arg3\().b} [\arg4], [x0], x1
st4 {\arg0\().b, \arg1\().b, \arg2\().b, \arg3\().b} [\arg5], [x2], x1
.endm
.macro STORE_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6
st3 {\arg0\().b, \arg1\().b, \arg2\().b} [\arg6], [x3], x1
st3 {\arg3\().b, \arg4\().b, \arg5\().b} [\arg6], [x0], x1
.endm
.macro LOAD_CHROMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5
ld4 {\arg0\().b, \arg1\().b, \arg2\().b, \arg3\().b} [\arg5], [\arg4], x2
.endm
.macro STORE_CHROMA_DATA_2 arg0, arg1, arg2, arg3
st2 {\arg0\().b, \arg1\().b} [\arg3], [\arg2], x2
.endm
.macro ZERO_JUMP_END arg0, arg1, arg2, arg3
mov \arg1, \arg0\().d[0]
mov \arg2, \arg0\().d[1]
orr \arg1, \arg1, \arg2
cbz \arg1, \arg3
.endm
.macro BS_NZC_CHECK arg0, arg1, arg2, arg3, arg4
ld1 {v0.16b}, [\arg0]
//Arrange the input data --- TOP
ands x6, \arg1, #2
cbz x6, bs_nzc_check_jump0
sub x6, \arg0, \arg2, lsl #4
sub x6, x6, \arg2, lsl #3
add x6, x6, #12
ld1 {v1.s} [3], [x6]
bs_nzc_check_jump0:
ext v1.16b, v1.16b, v0.16b, #12
add \arg3\().16b, v0.16b, v1.16b
// Arrange the input data --- LEFT
ands x6, \arg1, #1
cbz x6, bs_nzc_check_jump1
sub x6, \arg0, #21
add x7, x6, #4
ld1 {v1.b} [12], [x6]
add x6, x7, #4
ld1 {v1.b} [13], [x7]
add x7, x6, #4
ld1 {v1.b} [14], [x6]
ld1 {v1.b} [15], [x7]
bs_nzc_check_jump1:
ins v2.d[0], v0.d[1]
zip1 v0.16b, v0.16b, v2.16b
ins v2.d[0], v0.d[1]
zip1 v0.16b, v0.16b, v2.16b
ext v1.16b, v1.16b, v0.16b, #12
add \arg4\().16b, v0.16b, v1.16b
.endm
.macro BS_COMPARE_MV arg0, arg1, arg2, arg3, arg4, arg5
//in: \arg0,\arg1(const),\arg2(const),\arg3(const),\arg4(const); out:\arg5
mov w6, #4
sabd v20.8h, \arg0\().8h, \arg1\().8h
sabd v21.8h, \arg1\().8h, \arg2\().8h
dup \arg0\().8h, w6
sabd v22.8h, \arg2\().8h, \arg3\().8h
sabd v23.8h, \arg3\().8h, \arg4\().8h
cmge v20.8h, v20.8h, \arg0\().8h
cmge v21.8h, v21.8h, \arg0\().8h
cmge v22.8h, v22.8h, \arg0\().8h
cmge v23.8h, v23.8h, \arg0\().8h
addp v20.8h, v20.8h, v21.8h
addp v21.8h, v22.8h, v23.8h
addhn \arg5\().8b, v20.8h, v20.8h
addhn2 \arg5\().16b, v21.8h, v21.8h
.endm
.macro BS_MV_CHECK arg0, arg1, arg2, arg3, arg4, arg5, arg6
ldp q0, q1, [\arg0], #32
ldp q2, q3, [\arg0]
sub \arg0, \arg0, #32
// Arrenge the input data --- TOP
ands x6, \arg1, #2
cbz x6, bs_mv_check_jump0
sub x6, \arg0, \arg2, lsl #6
add x6, x6, #48
ld1 {v4.16b}, [x6]
bs_mv_check_jump0:
BS_COMPARE_MV v4, v0, v1, v2, v3, \arg3
// Arrange the input data --- LEFT
ands x6, \arg1, #1
cbz x6, bs_mv_check_jump1
sub x6, \arg0, #52
add x7, x6, #16
ld1 {v4.s} [0], [x6]
add x6, x7, #16
ld1 {v4.s} [1], [x7]
add x7, x6, #16
ld1 {v4.s} [2], [x6]
ld1 {v4.s} [3], [x7]
bs_mv_check_jump1:
zip1 \arg5\().4s, v0.4s, v2.4s
zip2 \arg6\().4s, v0.4s, v2.4s
zip1 v0.4s, v1.4s, v3.4s
zip2 v2.4s, v1.4s, v3.4s
zip2 v1.4s, \arg5\().4s, v0.4s
zip1 v0.4s, \arg5\().4s, v0.4s
zip2 v3.4s, \arg6\().4s, v2.4s
zip1 v2.4s, \arg6\().4s, v2.4s
BS_COMPARE_MV v4, v0, v1, v2, v3, \arg4
.endm
WELS_ASM_AARCH64_FUNC_BEGIN WelsNonZeroCount_AArch64_neon
mov w1, #1
dup v3.8b, w1
ld1 {v0.8b, v1.8b, v2.8b}, [x0]
umin v0.8b, v0.8b, v3.8b
umin v1.8b, v1.8b, v3.8b
umin v2.8b, v2.8b, v3.8b
st1 {v0.8b, v1.8b, v2.8b}, [x0]
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaLt4V_AArch64_neon //uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc
dup v16.16b, w2 //alpha
dup v17.16b, w3 //beta
SIGN_EXTENSION x1,w1
add x2, x1, x1, lsl #1
sub x2, x0, x2
movi v23.16b, #128
ld1 {v0.16b}, [x2], x1
ld1 {v1.16b}, [x2], x1
ld1 {v2.16b}, [x2]
ld1 {v3.16b}, [x0], x1
ld1 {v4.16b}, [x0], x1
ld1 {v5.16b}, [x0]
sub x2, x2, x1
ld4r {v18.8b, v19.8b, v20.8b, v21.8b}, [x4]
trn1 v18.2s, v18.2s, v19.2s
trn1 v20.2s, v20.2s, v21.2s
trn1 v6.2d, v18.2d, v20.2d // iTc0: 0000, 1111, 2222, 3333
cmge v7.16b, v6.16b, #0 // iTc0 Flag
MASK_MATRIX v1, v2, v3, v4, v16, v17, v18
and v7.16b, v7.16b, v18.16b // need filter flag
ZERO_JUMP_END v7, x3, x4, DeblockLumaLt4V_AArch64_neon_end
eor v18.16b, v18.16b, v18.16b
sub v18.16b, v18.16b, v6.16b // -iTc0: 0000, 1111, 2222, 3333
DIFF_LUMA_LT4_P1_Q1 v0, v1, v2, v3, v17, v18, v6, v7, v19, v20
st1 {v19.16b}, [x2], x1
DIFF_LUMA_LT4_P1_Q1 v5, v4, v3, v2, v17, v18, v6, v7, v21, v22
abs v20.16b, v20.16b
abs v22.16b, v22.16b
add v6.16b, v6.16b, v20.16b
add v6.16b, v6.16b, v22.16b
eor v18.16b, v18.16b, v18.16b
sub v18.16b, v18.16b, v6.16b
DIFF_LUMA_LT4_P0_Q0_1 v1, v2, v3, v4, v19, v20, v22
DIFF_LUMA_LT4_P0_Q0_2 v1, v2, v3, v4, v19, v20, v22
smax v19.16b, v19.16b, v18.16b
smin v19.16b, v19.16b, v6.16b
and v19.16b, v19.16b, v7.16b
EXTRACT_DELTA_INTO_TWO_PART v19, v20
uqadd v2.16b, v2.16b, v20.16b
uqsub v2.16b, v2.16b, v19.16b
st1 {v2.16b}, [x2], x1
uqsub v3.16b, v3.16b, v20.16b
uqadd v3.16b, v3.16b, v19.16b
st1 {v3.16b}, [x2], x1
st1 {v21.16b}, [x2]
DeblockLumaLt4V_AArch64_neon_end:
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaEq4V_AArch64_neon
dup v16.16b, w2 //alpha
dup v17.16b, w3 //beta
SIGN_EXTENSION x1,w1
sub x3, x0, x1, lsl #2
ld1 {v0.16b}, [x3], x1
ld1 {v4.16b}, [x0], x1
ld1 {v1.16b}, [x3], x1
ld1 {v5.16b}, [x0], x1
ld1 {v2.16b}, [x3], x1
ld1 {v6.16b}, [x0], x1
ld1 {v3.16b}, [x3]
ld1 {v7.16b}, [x0]
sub x3, x3, x1, lsl #1
MASK_MATRIX v2, v3, v4, v5, v16, v17, v18
lsr w2, w2, #2
add w2, w2, #2
dup v16.16b, w2 //((alpha >> 2) + 2)
uabd v19.16b, v3.16b, v4.16b
cmhi v20.16b, v16.16b, v19.16b //iDetaP0Q0 < ((iAlpha >> 2) + 2)
uabd v21.16b, v1.16b, v3.16b
cmhi v21.16b, v17.16b, v21.16b //bDetaP2P0
and v21.16b, v21.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaP2P0
uabd v22.16b, v6.16b, v4.16b
cmhi v22.16b, v17.16b, v22.16b //bDetaQ2Q0
and v22.16b, v22.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaQ2Q0
and v20.16b, v20.16b, v18.16b //(iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0&&(iDetaP0Q0 < ((iAlpha >> 2) + 2))
mov v23.16b, v21.16b
mov v24.16b, v21.16b
mov v25.16b, v0.16b
DIFF_LUMA_EQ4_P2P1P0_1 v0, v1, v2, v3, v4, v5, v23, v19, v17, v16
DIFF_LUMA_EQ4_P2P1P0_2 v25, v1, v2, v3, v4, v5, v24, v19, v17, v16
ins v0.d[1], v25.d[1]
ins v23.d[1], v24.d[1]
and v21.16b, v20.16b, v21.16b
DIFF_LUMA_EQ4_MASK v19, v1, v21, v17
st1 {v17.16b}, [x3], x1
DIFF_LUMA_EQ4_MASK v0, v2, v21, v17
st1 {v17.16b}, [x3], x1
DIFF_LUMA_EQ4_MASK v23, v3, v18, v17
st1 {v17.16b}, [x3], x1
mov v23.16b, v22.16b
mov v24.16b, v22.16b
mov v25.16b, v7.16b
DIFF_LUMA_EQ4_P2P1P0_1 v7, v6, v5, v4, v3, v2, v23, v19, v17, v16
DIFF_LUMA_EQ4_P2P1P0_2 v25, v6, v5, v4, v3, v2, v24, v19, v17, v16
ins v7.d[1], v25.d[1]
ins v23.d[1], v24.d[1]
and v22.16b, v20.16b, v22.16b
DIFF_LUMA_EQ4_MASK v23, v4, v18, v17
st1 {v17.16b}, [x3], x1
DIFF_LUMA_EQ4_MASK v7, v5, v22, v17
st1 {v17.16b}, [x3], x1
DIFF_LUMA_EQ4_MASK v19, v6, v22, v17
st1 {v17.16b}, [x3], x1
DeblockLumaEq4V_AArch64_neon_end:
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaLt4H_AArch64_neon //uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc
dup v16.16b, w2 //alpha
dup v17.16b, w3 //beta
sub x2, x0, #3
movi v23.16b, #128
SIGN_EXTENSION x1,w1
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 0
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 1
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 2
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 3
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 4
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 5
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 6
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 7
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 8
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 9
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 10
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 11
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 12
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 13
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 14
LOAD_LUMA_DATA_3 v0, v1, v2, v3, v4, v5, 15
sub x0, x0, x1, lsl #4
ld4r {v18.8b, v19.8b, v20.8b, v21.8b}, [x4]
trn1 v18.2s, v18.2s, v19.2s
trn1 v20.2s, v20.2s, v21.2s
trn1 v6.2d, v18.2d, v20.2d // iTc0: 0000, 1111, 2222, 3333
cmge v7.16b, v6.16b, #0 // iTc0 Flag
MASK_MATRIX v1, v2, v3, v4, v16, v17, v18
and v7.16b, v7.16b, v18.16b // need filter flag
ZERO_JUMP_END v7, x3, x4, DeblockLumaLt4H_AArch64_neon_end
eor v18.16b, v18.16b, v18.16b
sub v18.16b, v18.16b, v6.16b // -iTc0: 0000, 1111, 2222, 3333
DIFF_LUMA_LT4_P1_Q1 v0, v1, v2, v3, v17, v18, v6, v7, v19, v20 //Use Tmp v23,v24
mov v25.16b, v19.16b
DIFF_LUMA_LT4_P1_Q1 v5, v4, v3, v2, v17, v18, v6, v7, v21, v22 //Use Tmp v23,v24
abs v20.16b, v20.16b
abs v22.16b, v22.16b
add v6.16b, v6.16b, v20.16b
add v6.16b, v6.16b, v22.16b
eor v18.16b, v18.16b, v18.16b
sub v18.16b, v18.16b, v6.16b
DIFF_LUMA_LT4_P0_Q0_1 v1, v2, v3, v4, v19, v20, v22
DIFF_LUMA_LT4_P0_Q0_2 v1, v2, v3, v4, v19, v20, v22
smax v19.16b, v19.16b, v18.16b
smin v19.16b, v19.16b, v6.16b
and v19.16b, v19.16b, v7.16b
EXTRACT_DELTA_INTO_TWO_PART v19, v20
uqadd v2.16b, v2.16b, v20.16b
uqsub v2.16b, v2.16b, v19.16b
mov v26.16b, v2.16b
uqsub v3.16b, v3.16b, v20.16b
uqadd v3.16b, v3.16b, v19.16b
mov v27.16b, v3.16b
mov v28.16b, v21.16b
sub x0, x0, #2
add x2, x0, x1
lsl x1, x1, #1
STORE_LUMA_DATA_4 v25, v26, v27, v28, 0, 1
STORE_LUMA_DATA_4 v25, v26, v27, v28, 2, 3
STORE_LUMA_DATA_4 v25, v26, v27, v28, 4, 5
STORE_LUMA_DATA_4 v25, v26, v27, v28, 6, 7
STORE_LUMA_DATA_4 v25, v26, v27, v28, 8, 9
STORE_LUMA_DATA_4 v25, v26, v27, v28, 10, 11
STORE_LUMA_DATA_4 v25, v26, v27, v28, 12, 13
STORE_LUMA_DATA_4 v25, v26, v27, v28, 14, 15
DeblockLumaLt4H_AArch64_neon_end:
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaEq4H_AArch64_neon
dup v16.16b, w2 //alpha
dup v17.16b, w3 //beta
sub x3, x0, #4
SIGN_EXTENSION x1,w1
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 0
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 1
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 2
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 3
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 4
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 5
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 6
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 7
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 8
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 9
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 10
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 11
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 12
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 13
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 14
LOAD_LUMA_DATA_4 v0, v1, v2, v3, v4, v5, v6, v7, 15
sub x0, x0, x1, lsl #4
sub x3, x0, #3
MASK_MATRIX v2, v3, v4, v5, v16, v17, v18
ZERO_JUMP_END v18, x4, x5, DeblockLumaEq4H_AArch64_neon_end
lsr w2, w2, #2
add w2, w2, #2
dup v16.16b, w2 //((alpha >> 2) + 2)
uabd v19.16b, v3.16b, v4.16b
cmhi v20.16b, v16.16b, v19.16b //iDetaP0Q0 < ((iAlpha >> 2) + 2)
uabd v21.16b, v1.16b, v3.16b
cmhi v21.16b, v17.16b, v21.16b //bDetaP2P0
and v21.16b, v21.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaP2P0
uabd v22.16b, v6.16b, v4.16b
cmhi v22.16b, v17.16b, v22.16b //bDetaQ2Q0
and v22.16b, v22.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaQ2Q0
and v20.16b, v20.16b, v18.16b //(iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0&&(iDetaP0Q0 < ((iAlpha >> 2) + 2))
mov v23.16b, v21.16b
mov v24.16b, v21.16b
mov v25.16b, v0.16b
DIFF_LUMA_EQ4_P2P1P0_1 v0, v1, v2, v3, v4, v5, v23, v19, v17, v16
DIFF_LUMA_EQ4_P2P1P0_2 v25, v1, v2, v3, v4, v5, v24, v19, v17, v16
ins v0.d[1], v25.d[1]
ins v23.d[1], v24.d[1]
and v21.16b, v20.16b, v21.16b
DIFF_LUMA_EQ4_MASK v19, v1, v21, v17
mov v26.16b, v17.16b
DIFF_LUMA_EQ4_MASK v0, v2, v21, v17
mov v27.16b, v17.16b
DIFF_LUMA_EQ4_MASK v23, v3, v18, v17
mov v28.16b, v17.16b
mov v23.16b, v22.16b
mov v24.16b, v22.16b
mov v25.16b, v7.16b
DIFF_LUMA_EQ4_P2P1P0_1 v7, v6, v5, v4, v3, v2, v23, v19, v17, v16
DIFF_LUMA_EQ4_P2P1P0_2 v25, v6, v5, v4, v3, v2, v24, v19, v17, v16
ins v7.d[1], v25.d[1]
ins v23.d[1], v24.d[1]
and v22.16b, v20.16b, v22.16b
DIFF_LUMA_EQ4_MASK v23, v4, v18, v17
mov v29.16b, v17.16b
DIFF_LUMA_EQ4_MASK v7, v5, v22, v17
mov v30.16b, v17.16b
DIFF_LUMA_EQ4_MASK v19, v6, v22, v17
mov v31.16b, v17.16b
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 0
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 1
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 2
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 3
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 4
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 5
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 6
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 7
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 8
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 9
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 10
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 11
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 12
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 13
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 14
STORE_LUMA_DATA_3 v26, v27, v28, v29, v30, v31, 15
DeblockLumaEq4H_AArch64_neon_end:
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN DeblockChromaLt4V_AArch64_neon //uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iAlpha, int32_t iBeta, int8_t* pTc
dup v16.16b, w3 //alpha
dup v17.16b, w4 //beta
lsl x3, x2, #1
sub x6, x0, x3 //pPixCb-2*Stride
sub x7, x1, x3 //pPixCr-2*Stride
ld1 {v0.d} [0], [x6], x2
ld1 {v1.d} [0], [x6]
ld1 {v2.d} [0], [x0], x2
ld1 {v3.d} [0], [x0]
ld1 {v0.d} [1], [x7], x2
ld1 {v1.d} [1], [x7]
ld1 {v2.d} [1], [x1], x2
ld1 {v3.d} [1], [x1]
ld4r {v18.8b, v19.8b, v20.8b, v21.8b}, [x5]
trn1 v18.4h, v18.4h, v19.4h //0011,0011,
trn1 v20.4h, v20.4h, v21.4h //2233,2233
zip1 v6.4s, v18.4s, v20.4s //iTc0: 0011,2233,0011,2233
cmgt v7.16b, v6.16b, #0 // iTc0 Flag
MASK_MATRIX v0, v1, v2, v3, v16, v17, v18
and v7.16b, v7.16b, v18.16b // need filter flag
ZERO_JUMP_END v7, x4, x5, DeblockChromaLt4V_AArch64_neon_end
eor v18.16b, v18.16b, v18.16b
sub v18.16b, v18.16b, v6.16b //-iTc0: 0011,2233,0011,2233
DIFF_LUMA_LT4_P0_Q0_1 v0, v1, v2, v3, v19, v20, v22
DIFF_LUMA_LT4_P0_Q0_2 v0, v1, v2, v3, v19, v20, v22
smax v19.16b, v19.16b, v18.16b
smin v19.16b, v19.16b, v6.16b
and v19.16b, v19.16b, v7.16b
EXTRACT_DELTA_INTO_TWO_PART v19, v20
uqadd v1.16b, v1.16b, v20.16b
uqsub v1.16b, v1.16b, v19.16b
st1 {v1.d} [0], [x6], x2
st1 {v1.d} [1], [x7], x2
uqsub v2.16b, v2.16b, v20.16b
uqadd v2.16b, v2.16b, v19.16b
st1 {v2.d} [0], [x6]
st1 {v2.d} [1], [x7]
DeblockChromaLt4V_AArch64_neon_end:
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN DeblockChromaLt4H_AArch64_neon //uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iAlpha, int32_t iBeta, int8_t* pTc
dup v16.16b, w3 //alpha
dup v17.16b, w4 //beta
sub x6, x0, #2 //pPixCb-2
sub x7, x1, #2 //pPixCr-2
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 0
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 1
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 2
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 3
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 4
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 5
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 6
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 7
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 8
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 9
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 10
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 11
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 12
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 13
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 14
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 15
sub x0, x0, #1
sub x1, x1, #1
ld4r {v18.8b, v19.8b, v20.8b, v21.8b}, [x5]
trn1 v18.4h, v18.4h, v19.4h //0011,0011,
trn1 v20.4h, v20.4h, v21.4h //2233,2233
zip1 v6.4s, v18.4s, v20.4s //iTc0: 0011,2233,0011,2233
cmgt v7.16b, v6.16b, #0 // iTc0 Flag
MASK_MATRIX v0, v1, v2, v3, v16, v17, v18
and v7.16b, v7.16b, v18.16b // need filter flag
ZERO_JUMP_END v7, x4, x5, DeblockChromaLt4H_AArch64_neon_end
eor v18.16b, v18.16b, v18.16b
sub v18.16b, v18.16b, v6.16b //-iTc0: 0011,2233,0011,2233
DIFF_LUMA_LT4_P0_Q0_1 v0, v1, v2, v3, v19, v20, v22
DIFF_LUMA_LT4_P0_Q0_2 v0, v1, v2, v3, v19, v20, v22
smax v19.16b, v19.16b, v18.16b
smin v19.16b, v19.16b, v6.16b
and v19.16b, v19.16b, v7.16b
EXTRACT_DELTA_INTO_TWO_PART v19, v20
uqadd v1.16b, v1.16b, v20.16b
uqsub v1.16b, v1.16b, v19.16b
uqsub v2.16b, v2.16b, v20.16b
uqadd v2.16b, v2.16b, v19.16b
STORE_CHROMA_DATA_2 v1, v2, x0, 0
STORE_CHROMA_DATA_2 v1, v2, x0, 1
STORE_CHROMA_DATA_2 v1, v2, x0, 2
STORE_CHROMA_DATA_2 v1, v2, x0, 3
STORE_CHROMA_DATA_2 v1, v2, x0, 4
STORE_CHROMA_DATA_2 v1, v2, x0, 5
STORE_CHROMA_DATA_2 v1, v2, x0, 6
STORE_CHROMA_DATA_2 v1, v2, x0, 7
STORE_CHROMA_DATA_2 v1, v2, x1, 8
STORE_CHROMA_DATA_2 v1, v2, x1, 9
STORE_CHROMA_DATA_2 v1, v2, x1, 10
STORE_CHROMA_DATA_2 v1, v2, x1, 11
STORE_CHROMA_DATA_2 v1, v2, x1, 12
STORE_CHROMA_DATA_2 v1, v2, x1, 13
STORE_CHROMA_DATA_2 v1, v2, x1, 14
STORE_CHROMA_DATA_2 v1, v2, x1, 15
DeblockChromaLt4H_AArch64_neon_end:
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN DeblockChromaEq4V_AArch64_neon //uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iAlpha, int32_t iBeta
dup v16.16b, w3 //alpha
dup v17.16b, w4 //beta
lsl x3, x2, #1
sub x6, x0, x3 //pPixCb-2*Stride
sub x7, x1, x3 //pPixCr-2*Stride
ld1 {v0.d} [0], [x6], x2
ld1 {v1.d} [0], [x6]
ld1 {v2.d} [0], [x0], x2
ld1 {v3.d} [0], [x0]
ld1 {v0.d} [1], [x7], x2
ld1 {v1.d} [1], [x7]
ld1 {v2.d} [1], [x1], x2
ld1 {v3.d} [1], [x1]
MASK_MATRIX v0, v1, v2, v3, v16, v17, v7
ZERO_JUMP_END v7, x3, x4, DeblockChromaEq4V_AArch64_neon_end
DIFF_CHROMA_EQ4_P0Q0_1 v0, v1, v2, v3, v18, v19, v20, v21
DIFF_CHROMA_EQ4_P0Q0_2 v0, v1, v2, v3, v18, v19, v20, v21
mov v6.16b, v7.16b
bsl v6.16b, v20.16b, v1.16b
bsl v7.16b, v21.16b, v2.16b
st1 {v6.d} [0], [x6], x2
st1 {v6.d} [1], [x7], x2
st1 {v7.d} [0], [x6]
st1 {v7.d} [1], [x7]
DeblockChromaEq4V_AArch64_neon_end:
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN DeblockChromaEq4H_AArch64_neon //uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iAlpha, int32_t iBeta
dup v16.16b, w3 //alpha
dup v17.16b, w4 //beta
sub x6, x0, #2 //pPixCb-2
sub x7, x1, #2 //pPixCr-2
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 0
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 1
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 2
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 3
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 4
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 5
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 6
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x6, 7
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 8
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 9
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 10
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 11
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 12
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 13
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 14
LOAD_CHROMA_DATA_4 v0, v1, v2, v3, x7, 15
sub x0, x0, #1
sub x1, x1, #1
MASK_MATRIX v0, v1, v2, v3, v16, v17, v7
ZERO_JUMP_END v7, x3, x4, DeblockChromaEq4H_AArch64_neon_end
DIFF_CHROMA_EQ4_P0Q0_1 v0, v1, v2, v3, v18, v19, v20, v21
DIFF_CHROMA_EQ4_P0Q0_2 v0, v1, v2, v3, v18, v19, v20, v21
mov v6.16b, v7.16b
bsl v6.16b, v20.16b, v1.16b
bsl v7.16b, v21.16b, v2.16b
STORE_CHROMA_DATA_2 v6, v7, x0, 0
STORE_CHROMA_DATA_2 v6, v7, x0, 1
STORE_CHROMA_DATA_2 v6, v7, x0, 2
STORE_CHROMA_DATA_2 v6, v7, x0, 3
STORE_CHROMA_DATA_2 v6, v7, x0, 4
STORE_CHROMA_DATA_2 v6, v7, x0, 5
STORE_CHROMA_DATA_2 v6, v7, x0, 6
STORE_CHROMA_DATA_2 v6, v7, x0, 7
STORE_CHROMA_DATA_2 v6, v7, x1, 8
STORE_CHROMA_DATA_2 v6, v7, x1, 9
STORE_CHROMA_DATA_2 v6, v7, x1, 10
STORE_CHROMA_DATA_2 v6, v7, x1, 11
STORE_CHROMA_DATA_2 v6, v7, x1, 12
STORE_CHROMA_DATA_2 v6, v7, x1, 13
STORE_CHROMA_DATA_2 v6, v7, x1, 14
STORE_CHROMA_DATA_2 v6, v7, x1, 15
DeblockChromaEq4H_AArch64_neon_end:
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN DeblockingBSCalcEnc_AArch64_neon
// Checking the nzc status
BS_NZC_CHECK x0, x2, x3, v16, v17 //v16,v17 save the nzc status
// For checking bS[I] = 2
movi v0.16b, #0
cmgt v16.16b, v16.16b, v0.16b
cmgt v17.16b, v17.16b, v0.16b
movi v0.16b, #2
and v16.16b, v16.16b, v0.16b //v16 save the nzc check result all the time --- for dir is top
and v17.16b, v17.16b, v0.16b //v17 save the nzc check result all the time --- for dir is left
// Checking the mv status
BS_MV_CHECK x1, x2, x3, v18, v19, v5 , v6 //v18, v19 save the mv status
// For checking bS[I] = 1
movi v0.16b, #1
and v18.16b, v18.16b, v0.16b //v18 save the nzc check result all the time --- for dir is top
and v19.16b, v19.16b, v0.16b //v19 save the nzc check result all the time --- for dir is left
// Check bS[I] is '1' or '2'
umax v1.16b, v18.16b, v16.16b
umax v0.16b, v19.16b, v17.16b
st1 {v0.16b, v1.16b}, [x4]
WELS_ASM_AARCH64_FUNC_END
#endif

View File

@ -0,0 +1,150 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON_AARCH64
#include "arm_arch64_common_macro.S"
//void ExpandPictureLuma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH);
WELS_ASM_AARCH64_FUNC_BEGIN ExpandPictureLuma_AArch64_neon
SIGN_EXTENSION x1,w1
SIGN_EXTENSION x2,w2
SIGN_EXTENSION x3,w3
mov x7, x0
mov x8, x3
add x4, x7, x2
sub x4, x4, #1
mov x10, #16
//For the left and right expand
_expand_picture_luma_loop2:
sub x5, x7, #32
add x6, x4, #1
ld1r {v0.16b}, [x7], x1
ld1r {v2.16b}, [x4], x1
mov v1.16b, v0.16b
mov v3.16b, v2.16b
st2 {v0.16b, v1.16b}, [x5]
st2 {v2.16b, v3.16b}, [x6]
sub x8, x8, #1
cbnz x8, _expand_picture_luma_loop2
//for the top and bottom expand
add x2, x2, #64
sub x0, x0, #32
madd x4, x1, x3, x0
sub x4, x4, x1
_expand_picture_luma_loop0:
mov x5, #32
msub x5, x5, x1, x0
add x6, x4, x1
ld1 {v0.16b}, [x0], x10
ld1 {v1.16b}, [x4], x10
mov x8, #32
_expand_picture_luma_loop1:
st1 {v0.16b}, [x5], x1
st1 {v1.16b}, [x6], x1
sub x8, x8, #1
cbnz x8, _expand_picture_luma_loop1
sub x2, x2, #16
cbnz x2, _expand_picture_luma_loop0
WELS_ASM_AARCH64_FUNC_END
//void ExpandPictureChroma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
// const int32_t kiPicH);
WELS_ASM_AARCH64_FUNC_BEGIN ExpandPictureChroma_AArch64_neon
//Save the dst
SIGN_EXTENSION x1,w1
SIGN_EXTENSION x2,w2
SIGN_EXTENSION x3,w3
mov x7, x0
mov x8, x3
mov x10, #16
add x4, x7, x2
sub x4, x4, #1
//For the left and right expand
_expand_picture_chroma_loop2:
sub x5, x7, #16
add x6, x4, #1
ld1r {v0.16b}, [x7], x1
ld1r {v1.16b}, [x4], x1
st1 {v0.16b}, [x5]
st1 {v1.16b}, [x6]
sub x8, x8, #1
cbnz x8, _expand_picture_chroma_loop2
//for the top and bottom expand
add x2, x2, #32
//
mov x9, x2
mov x11, #15
bic x2, x2, x11
//
sub x0, x0, #16
madd x4, x1, x3, x0
sub x4, x4, x1
_expand_picture_chroma_loop0:
mov x5, #16
msub x5, x5, x1, x0
add x6, x4, x1
ld1 {v0.16b}, [x0], x10
ld1 {v1.16b}, [x4], x10
mov x8, #16
_expand_picture_chroma_loop1:
st1 {v0.16b}, [x5], x1
st1 {v1.16b}, [x6], x1
sub x8, x8, #1
cbnz x8, _expand_picture_chroma_loop1
sub x2, x2, #16
cbnz x2, _expand_picture_chroma_loop0
and x9, x9, #15
sub x9, x9, #8
cbnz x9, _expand_picture_chroma_end
mov x5, #16
msub x5, x5, x1, x0
add x6, x4, x1
ld1 {v0.8b}, [x0]
ld1 {v1.8b}, [x4]
mov x8, #16
_expand_picture_chroma_loop3:
st1 {v0.8b}, [x5], x1
st1 {v1.8b}, [x6], x1
sub x8, x8, #1
cbnz x8, _expand_picture_chroma_loop3
_expand_picture_chroma_end:
WELS_ASM_AARCH64_FUNC_END
#endif

View File

@ -0,0 +1,58 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON_AARCH64
#include "arm_arch64_common_macro.S"
//for Luma 16x16
//void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon
SIGN_EXTENSION x2,w2
sub x3, x1, x2
ld1 {v0.16b}, [x3]
.rept 16
st1 {v0.16b}, [x0], 16
.endr
WELS_ASM_AARCH64_FUNC_END
//void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredH_AArch64_neon
SIGN_EXTENSION x2,w2
sub x3, x1, #1
.rept 16
ld1r {v0.16b}, [x3], x2
st1 {v0.16b}, [x0], 16
.endr
WELS_ASM_AARCH64_FUNC_END
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,287 @@
/*!
* \copy
* Copyright (c) 2009-2015, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsList
*
* \brief for the list function needed in ThreadPool
*
* \date 9/27/2015 Created
*
*************************************************************************************
*/
#ifndef _WELS_LIST_H_
#define _WELS_LIST_H_
#include "typedefs.h"
#include <stdlib.h>
namespace WelsCommon {
template<typename TNodeType>
struct SNode {
TNodeType* pPointer;
SNode* pPrevNode;
SNode* pNextNode;
};
template<typename TNodeType>
class CWelsList {
public:
CWelsList() {
m_iCurrentNodeCount = 0;
m_iMaxNodeCount = 50;
m_pCurrentList = NULL;
m_pFirst = NULL;
m_pCurrent = NULL;
m_pLast = NULL;
};
~CWelsList() {
if (m_pCurrentList) {
free (m_pCurrentList);
m_pCurrentList = NULL;
}
m_pCurrentList = NULL;
m_pFirst = NULL;
m_pCurrent = NULL;
m_pLast = NULL;
};
int32_t size() {
return m_iCurrentNodeCount;
}
bool push_back (TNodeType* pNode) {
if (!pNode) {
return false;
}
if (NULL == m_pCurrentList) {
m_pCurrentList = static_cast<SNode<TNodeType>*> (malloc (m_iMaxNodeCount * sizeof (SNode<TNodeType>)));
if (NULL == m_pCurrentList) {
return false;
} else {
ResetStorage();
}
}
if (NULL == m_pCurrent) {
if (!ExpandList()) {
return false;
}
}
m_pCurrent->pPointer = pNode;
m_pCurrent = m_pCurrent->pNextNode;
m_iCurrentNodeCount++;
return true;
}
TNodeType* begin() {
if (m_pFirst) {
return m_pFirst->pPointer;
}
return NULL;
}
void pop_front() {
if (m_iCurrentNodeCount == 0) {
return;
}
SNode<TNodeType>* pTemp = m_pFirst;
m_pFirst = m_pFirst->pNextNode;
m_pFirst->pPrevNode = NULL;
CleanOneNode (pTemp);
m_pLast->pNextNode = pTemp;
pTemp->pPrevNode = m_pLast;
m_pLast = pTemp;
if (NULL == m_pCurrent)
m_pCurrent = m_pLast;
m_iCurrentNodeCount --;
}
bool erase (TNodeType* pNode) {
if (0 == m_iCurrentNodeCount) {
return false;
}
SNode<TNodeType>* pTemp = m_pFirst;
do {
if (pNode == pTemp->pPointer) {
if (pTemp->pPrevNode) {
pTemp->pPrevNode->pNextNode = pTemp->pNextNode;
} else {
m_pFirst = pTemp->pNextNode;
}
if (pTemp->pNextNode) {
pTemp->pNextNode->pPrevNode = pTemp->pPrevNode;
}
CleanOneNode (pTemp);
m_iCurrentNodeCount --;
m_pLast->pNextNode = pTemp;
pTemp->pPrevNode = m_pLast;
m_pLast = pTemp;
return true;
}
pTemp = pTemp->pNextNode;
} while (pTemp && pTemp->pPointer);
return false;
}
bool findNode (TNodeType* pNodeTarget) {
if ((m_iCurrentNodeCount > 0) && pNodeTarget) {
SNode<TNodeType>* pNode = m_pFirst;
while (pNode) {
if (pNode->pPointer == pNodeTarget) {
return true;
}
pNode = pNode->pNextNode;
}
}
return false;
}
TNodeType* getNode (int iNodeIdx) {
if ((iNodeIdx > m_iCurrentNodeCount - 1) || (0 == m_iCurrentNodeCount)) {
return NULL;
}
SNode<TNodeType>* pNode = m_pFirst;
for (int i = 0; i < iNodeIdx; i++) {
if (pNode->pNextNode) {
pNode = pNode->pNextNode;
} else {
return NULL;
}
}
return pNode->pPointer;
}
private:
bool ExpandList() {
SNode<TNodeType>* tmpCurrentList = static_cast<SNode<TNodeType>*> (malloc (m_iMaxNodeCount * 2 * sizeof (
SNode<TNodeType>)));
if (tmpCurrentList == NULL) {
return false;
}
InitStorage (tmpCurrentList, (m_iMaxNodeCount * 2) - 1);
SNode<TNodeType>* pTemp = m_pFirst;
for (int i = 0; ((i < m_iMaxNodeCount) && pTemp); i++) {
tmpCurrentList[i].pPointer = pTemp->pPointer;
pTemp = pTemp->pNextNode;
}
free (m_pCurrentList);
m_pCurrentList = tmpCurrentList;
m_iCurrentNodeCount = m_iMaxNodeCount;
m_iMaxNodeCount = m_iMaxNodeCount * 2;
m_pFirst = & (m_pCurrentList[0]);
m_pLast = & (m_pCurrentList[m_iMaxNodeCount - 1]);
m_pCurrent = & (m_pCurrentList[m_iCurrentNodeCount]);
return true;
}
void InitStorage (SNode<TNodeType>* pList, const int32_t iMaxIndex) {
pList[0].pPrevNode = NULL;
pList[0].pPointer = NULL;
pList[0].pNextNode = & (pList[1]);
for (int i = 1; i < iMaxIndex; i++) {
pList[i].pPrevNode = & (pList[i - 1]);
pList[i].pPointer = NULL;
pList[i].pNextNode = & (pList[i + 1]);
}
pList[iMaxIndex].pPrevNode = & (pList[iMaxIndex - 1]);
pList[iMaxIndex].pPointer = NULL;
pList[iMaxIndex].pNextNode = NULL;
}
void CleanOneNode (SNode<TNodeType>* pSNode) {
pSNode->pPointer = NULL;
pSNode->pPrevNode = NULL;
pSNode->pNextNode = NULL;
}
void ResetStorage() {
InitStorage (m_pCurrentList, m_iMaxNodeCount - 1);
m_pCurrent = m_pCurrentList;
m_pFirst = & (m_pCurrentList[0]);
m_pLast = & (m_pCurrentList[m_iMaxNodeCount - 1]);
}
private:
int32_t m_iCurrentNodeCount;
int32_t m_iMaxNodeCount;
SNode<TNodeType>* m_pCurrentList;
SNode<TNodeType>* m_pFirst;
SNode<TNodeType>* m_pLast;
SNode<TNodeType>* m_pCurrent;
};
template<typename TNodeType>
class CWelsNonDuplicatedList : public CWelsList<TNodeType> {
public:
bool push_back (TNodeType* pNode) {
if (0 != this->size()) {
if ((NULL != pNode) && (this->findNode (pNode))) { //not checking NULL for easier testing
return false;
}
}
return CWelsList<TNodeType>::push_back (pNode);
}
};
}
#endif

View File

@ -0,0 +1,97 @@
/*!
* \copy
* Copyright (c) 2009-2015, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsLock.h
*
* \brief class wrapping for locks
*
* \date 5/09/2012 Created
*
*************************************************************************************
*/
#ifndef _WELS_LOCK_H_
#define _WELS_LOCK_H_
#include "macros.h"
#include "typedefs.h"
#include "WelsThreadLib.h"
namespace WelsCommon {
class CWelsLock {
DISALLOW_COPY_AND_ASSIGN (CWelsLock);
public:
CWelsLock() {
WelsMutexInit (&m_cMutex);
}
virtual ~CWelsLock() {
WelsMutexDestroy (&m_cMutex);
}
WELS_THREAD_ERROR_CODE Lock() {
return WelsMutexLock (&m_cMutex);
}
WELS_THREAD_ERROR_CODE Unlock() {
return WelsMutexUnlock (&m_cMutex);
}
private:
WELS_MUTEX m_cMutex;
};
class CWelsAutoLock {
DISALLOW_COPY_AND_ASSIGN (CWelsAutoLock);
public:
CWelsAutoLock (CWelsLock& cLock) : m_cLock (cLock) {
m_cLock.Lock();
}
virtual ~CWelsAutoLock() {
m_cLock.Unlock();
}
private:
CWelsLock& m_cLock;
};
}
#endif

View File

@ -0,0 +1,75 @@
/*!
* \copy
* Copyright (c) 2009-2015, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsTask.h
*
* \brief Interfaces introduced in thread pool
*
* \date 5/09/2012 Created
*
*************************************************************************************
*/
#ifndef _WELS_TASK_H_
#define _WELS_TASK_H_
#include "codec_def.h"
namespace WelsCommon {
class IWelsTaskSink {
public:
virtual int OnTaskExecuted() = 0;
virtual int OnTaskCancelled() = 0;
};
class IWelsTask {
public:
IWelsTask (IWelsTaskSink* pSink) {
m_pSink = pSink;
};
virtual ~IWelsTask() { }
virtual int Execute() = 0;
IWelsTaskSink* GetSink() {
return m_pSink;
};
protected:
IWelsTaskSink* m_pSink;
};
}
#endif

View File

@ -0,0 +1,83 @@
/*!
* \copy
* Copyright (c) 2009-2015, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsTaskThread.h
*
* \brief connecting task and thread
*
* \date 5/09/2012 Created
*
*************************************************************************************
*/
#ifndef _WELS_TASK_THREAD_H_
#define _WELS_TASK_THREAD_H_
#include "WelsTask.h"
#include "WelsThread.h"
namespace WelsCommon {
class CWelsTaskThread;
class IWelsTaskThreadSink {
public:
virtual WELS_THREAD_ERROR_CODE OnTaskStart (CWelsTaskThread* pThread, IWelsTask* pTask) = 0;
virtual WELS_THREAD_ERROR_CODE OnTaskStop (CWelsTaskThread* pThread, IWelsTask* pTask) = 0;
};
class CWelsTaskThread : public CWelsThread {
public:
CWelsTaskThread (IWelsTaskThreadSink* pSink);
virtual ~CWelsTaskThread();
WELS_THREAD_ERROR_CODE SetTask (IWelsTask* pTask);
virtual void ExecuteTask();
uintptr_t GetID() const {
return m_uiID;
}
private:
CWelsLock m_cLockTask;
IWelsTaskThreadSink* m_pSink;
IWelsTask* m_pTask;
uintptr_t m_uiID;
DISALLOW_COPY_AND_ASSIGN (CWelsTaskThread);
};
}
#endif

View File

@ -0,0 +1,106 @@
/*!
* \copy
* Copyright (c) 2009-2015, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsThread.h
*
* \brief Interfaces introduced in threads
*
* \date 5/09/2012 Created
*
*************************************************************************************
*/
#ifndef _WELS_THREAD_H_
#define _WELS_THREAD_H_
#include "macros.h"
#include "WelsLock.h"
#include "WelsThreadLib.h"
namespace WelsCommon {
class CWelsThread {
public:
CWelsThread();
virtual ~CWelsThread();
virtual void Thread();
virtual void ExecuteTask() = 0;
virtual WELS_THREAD_ERROR_CODE Start();
virtual void Kill();
WELS_MUTEX m_hMutex;
protected:
static WELS_THREAD_ROUTINE_TYPE TheThread (void* pParam);
void SetRunning (bool bRunning) {
CWelsAutoLock cLock (m_cLockStatus);
m_bRunning = bRunning;
}
void SetEndFlag (bool bEndFlag) {
CWelsAutoLock cLock (m_cLockStatus);
m_bEndFlag = bEndFlag;
}
bool GetRunning() const {
return m_bRunning;
}
bool GetEndFlag() const {
return m_bEndFlag;
}
void SignalThread() {
WelsEventSignal (&m_hEvent, &m_hMutex, &m_iConVar);
}
private:
WELS_THREAD_HANDLE m_hThread;
WELS_EVENT m_hEvent;
CWelsLock m_cLockStatus;
bool m_bRunning;
bool m_bEndFlag;
int m_iConVar;
DISALLOW_COPY_AND_ASSIGN (CWelsThread);
};
}
#endif

View File

@ -0,0 +1,151 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsThreadLib.h
*
* \brief Interfaces introduced in thread programming
*
* \date 11/17/2009 Created
*
*************************************************************************************
*/
#ifndef _WELS_THREAD_API_H_
#define _WELS_THREAD_API_H_
#include "typedefs.h"
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h>
typedef HANDLE WELS_THREAD_HANDLE;
typedef LPTHREAD_START_ROUTINE LPWELS_THREAD_ROUTINE;
typedef CRITICAL_SECTION WELS_MUTEX;
typedef HANDLE WELS_EVENT;
#define WELS_THREAD_ROUTINE_TYPE DWORD WINAPI
#define WELS_THREAD_ROUTINE_RETURN(rc) return (DWORD)rc;
#ifdef WINAPI_FAMILY
#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
#define WP80
#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
#define GetSystemInfo(x) GetNativeSystemInfo(x)
#define CreateEvent(attr, reset, init, name) CreateEventEx(attr, name, ((reset) ? CREATE_EVENT_MANUAL_RESET : 0) | ((init) ? CREATE_EVENT_INITIAL_SET : 0), EVENT_ALL_ACCESS)
#define CreateSemaphore(a, b, c, d) CreateSemaphoreEx(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS)
#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
#define WaitForMultipleObjects(a, b, c, d) WaitForMultipleObjectsEx(a, b, c, d, FALSE)
#endif
#endif
#else // NON-WINDOWS
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <pthread.h>
#include <semaphore.h>
#include <signal.h>
#include <errno.h>
#include <time.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <fcntl.h>
typedef pthread_t WELS_THREAD_HANDLE;
typedef void* (*LPWELS_THREAD_ROUTINE) (void*);
typedef pthread_mutex_t WELS_MUTEX;
#ifdef __APPLE__
typedef pthread_cond_t WELS_EVENT;
#else
typedef sem_t* WELS_EVENT;
#endif
#define WELS_THREAD_ROUTINE_TYPE void *
#define WELS_THREAD_ROUTINE_RETURN(rc) return (void*)(intptr_t)rc;
#endif//_WIN32
typedef int32_t WELS_THREAD_ERROR_CODE;
typedef int32_t WELS_THREAD_ATTR;
typedef struct _WelsLogicalProcessorInfo {
int32_t ProcessorCount;
} WelsLogicalProcessInfo;
#define WELS_THREAD_ERROR_OK 0
#define WELS_THREAD_ERROR_GENERAL ((uint32_t)(-1))
#define WELS_THREAD_ERROR_WAIT_OBJECT_0 0
#define WELS_THREAD_ERROR_WAIT_TIMEOUT ((uint32_t)0x00000102L)
#define WELS_THREAD_ERROR_WAIT_FAILED WELS_THREAD_ERROR_GENERAL
WELS_THREAD_ERROR_CODE WelsMutexInit (WELS_MUTEX* mutex);
WELS_THREAD_ERROR_CODE WelsMutexLock (WELS_MUTEX* mutex);
WELS_THREAD_ERROR_CODE WelsMutexUnlock (WELS_MUTEX* mutex);
WELS_THREAD_ERROR_CODE WelsMutexDestroy (WELS_MUTEX* mutex);
WELS_THREAD_ERROR_CODE WelsEventOpen (WELS_EVENT* p_event, const char* event_name = NULL);
WELS_THREAD_ERROR_CODE WelsEventClose (WELS_EVENT* event, const char* event_name = NULL);
WELS_THREAD_ERROR_CODE WelsEventSignal (WELS_EVENT* event,WELS_MUTEX *pMutex, int* iCondition);
WELS_THREAD_ERROR_CODE WelsEventWait (WELS_EVENT* event,WELS_MUTEX *pMutex, int& iCondition);
WELS_THREAD_ERROR_CODE WelsEventWaitWithTimeOut (WELS_EVENT* event, uint32_t dwMilliseconds,WELS_MUTEX *pMutex = NULL);
WELS_THREAD_ERROR_CODE WelsMultipleEventsWaitSingleBlocking (uint32_t nCount, WELS_EVENT* event_list,
WELS_EVENT* master_event = NULL,WELS_MUTEX *pMutex = NULL);
WELS_THREAD_ERROR_CODE WelsThreadCreate (WELS_THREAD_HANDLE* thread, LPWELS_THREAD_ROUTINE routine,
void* arg, WELS_THREAD_ATTR attr);
WELS_THREAD_ERROR_CODE WelsThreadSetName (const char* thread_name);
WELS_THREAD_ERROR_CODE WelsThreadJoin (WELS_THREAD_HANDLE thread);
WELS_THREAD_HANDLE WelsThreadSelf();
WELS_THREAD_ERROR_CODE WelsQueryLogicalProcessInfo (WelsLogicalProcessInfo* pInfo);
void WelsSleep (uint32_t dwMilliSecond);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,124 @@
/*!
* \copy
* Copyright (c) 2009-2015, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsThreadPool.h
*
* \brief Interfaces introduced in thread pool
*
* \date 5/09/2012 Created
*
*************************************************************************************
*/
#ifndef _WELS_THREAD_POOL_H_
#define _WELS_THREAD_POOL_H_
#include <stdio.h>
#include "WelsTask.h"
#include "WelsTaskThread.h"
#include "WelsList.h"
namespace WelsCommon {
class CWelsThreadPool : public CWelsThread, public IWelsTaskThreadSink {
public:
enum {
DEFAULT_THREAD_NUM = 4,
};
static WELS_THREAD_ERROR_CODE SetThreadNum (int32_t iMaxThreadNum);
static CWelsThreadPool* AddReference();
void RemoveInstance();
static bool IsReferenced();
//IWelsTaskThreadSink
virtual WELS_THREAD_ERROR_CODE OnTaskStart (CWelsTaskThread* pThread, IWelsTask* pTask);
virtual WELS_THREAD_ERROR_CODE OnTaskStop (CWelsTaskThread* pThread, IWelsTask* pTask);
// CWelsThread
virtual void ExecuteTask();
WELS_THREAD_ERROR_CODE QueueTask (IWelsTask* pTask);
int32_t GetThreadNum() const {
return m_iMaxThreadNum;
}
protected:
WELS_THREAD_ERROR_CODE Init();
WELS_THREAD_ERROR_CODE Uninit();
WELS_THREAD_ERROR_CODE CreateIdleThread();
void DestroyThread (CWelsTaskThread* pThread);
WELS_THREAD_ERROR_CODE AddThreadToIdleQueue (CWelsTaskThread* pThread);
WELS_THREAD_ERROR_CODE AddThreadToBusyList (CWelsTaskThread* pThread);
WELS_THREAD_ERROR_CODE RemoveThreadFromBusyList (CWelsTaskThread* pThread);
bool AddTaskToWaitedList (IWelsTask* pTask);
CWelsTaskThread* GetIdleThread();
IWelsTask* GetWaitedTask();
int32_t GetIdleThreadNum();
int32_t GetBusyThreadNum();
int32_t GetWaitedTaskNum();
void ClearWaitedTasks();
private:
CWelsThreadPool();
virtual ~CWelsThreadPool();
WELS_THREAD_ERROR_CODE StopAllRunning();
static int32_t m_iRefCount;
static int32_t m_iMaxThreadNum;
static CWelsThreadPool* m_pThreadPoolSelf;
CWelsNonDuplicatedList<IWelsTask>* m_cWaitedTasks;
CWelsNonDuplicatedList<CWelsTaskThread>* m_cIdleThreads;
CWelsList<CWelsTaskThread>* m_cBusyThreads;
CWelsLock m_cLockPool;
CWelsLock m_cLockWaitedTasks;
CWelsLock m_cLockIdleTasks;
CWelsLock m_cLockBusyTasks;
DISALLOW_COPY_AND_ASSIGN (CWelsThreadPool);
};
}
#endif

View File

@ -0,0 +1,340 @@
/*!
* \copy
* Copyright (c) 2013, Loongson Technology Co.,Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef ASMDEFS_MMI_H_
#define ASMDEFS_MMI_H_
#define CACHE_LINE_SIZE 32
#if defined(_ABI64) && _MIPS_SIM == _ABI64
# define mips_reg int64_t
# define PTRSIZE " 8 "
# define PTRLOG " 3 "
# define PTR_ADDU "daddu "
# define PTR_ADDIU "daddiu "
# define PTR_ADDI "daddi "
# define PTR_SUBU "dsubu "
# define PTR_L "ld "
# define PTR_S "sd "
# define PTR_SRA "dsra "
# define PTR_SRL "dsrl "
# define PTR_SLL "dsll "
#else
# define mips_reg int32_t
# define PTRSIZE " 4 "
# define PTRLOG " 2 "
# define PTR_ADDU "addu "
# define PTR_ADDIU "addiu "
# define PTR_ADDI "addi "
# define PTR_SUBU "subu "
# define PTR_L "lw "
# define PTR_S "sw "
# define PTR_SRA "sra "
# define PTR_SRL "srl "
# define PTR_SLL "sll "
#endif
#define MMI_XSawp_BH(f0, f2, f4, f6, f8, f10) \
"mov.d "#f8", "#f2" \n\t" \
"punpckhbh "#f2", "#f0", "#f4" \n\t" \
"punpcklbh "#f0", "#f0", "#f4" \n\t" \
"punpckhbh "#f10", "#f8", "#f6" \n\t" \
"punpcklbh "#f8", "#f8", "#f6" \n\t"
#define MMI_XSawp_HW(f0, f2, f4, f6, f8, f10) \
"mov.d "#f8", "#f2" \n\t" \
"punpckhhw "#f2", "#f0", "#f4" \n\t" \
"punpcklhw "#f0", "#f0", "#f4" \n\t" \
"punpckhhw "#f10", "#f8", "#f6" \n\t" \
"punpcklhw "#f8", "#f8", "#f6" \n\t"
#define MMI_XSawp_WD(f0, f2, f4, f6, f8, f10) \
"mov.d "#f8", "#f2" \n\t" \
"punpckhwd "#f2", "#f0", "#f4" \n\t" \
"punpcklwd "#f0", "#f0", "#f4" \n\t" \
"punpckhwd "#f10", "#f8", "#f6" \n\t" \
"punpcklwd "#f8", "#f8", "#f6" \n\t"
#define MMI_XSawp_DQ(f0, f2, f4, f6, f8, f10) \
"mov.d "#f8", "#f2" \n\t" \
"mov.d "#f2", "#f4" \n\t" \
"mov.d "#f10", "#f6" \n\t"
#define WELS_AbsH(f0, f2, f4, f6, f8, f10) \
"xor "#f8", "#f8", "#f8" \n\t" \
"psubh "#f10", "#f8", "#f6" \n\t" \
"psubh "#f8", "#f8", "#f4" \n\t" \
"pmaxsh "#f0", "#f4", "#f8" \n\t" \
"pmaxsh "#f2", "#f6", "#f10" \n\t"
#define MMI_SumSub(f0, f2, f4, f6, f8, f10) \
"mov.d "#f8", "#f4" \n\t" \
"mov.d "#f10", "#f6" \n\t" \
"paddh "#f4", "#f4", "#f0" \n\t" \
"paddh "#f6", "#f6", "#f2" \n\t" \
"psubh "#f0", "#f0", "#f8" \n\t" \
"psubh "#f2", "#f2", "#f10" \n\t"
#define MMI_LoadDiff8P(f0, f2, f4, f6, f8, r0, r1) \
"gsldlc1 "#f0", 0x7("#r0") \n\t" \
"gsldlc1 "#f4", 0x7("#r1") \n\t" \
"gsldrc1 "#f0", 0x0("#r0") \n\t" \
"gsldrc1 "#f4", 0x0("#r1") \n\t" \
"punpckhbh "#f2", "#f0", "#f8" \n\t" \
"punpcklbh "#f0", "#f0", "#f8" \n\t" \
"punpckhbh "#f6", "#f4", "#f8" \n\t" \
"punpcklbh "#f4", "#f4", "#f8" \n\t" \
"psubh "#f0", "#f0", "#f4" \n\t" \
"psubh "#f2", "#f2", "#f6" \n\t"
#define MMI_TransTwo4x4H(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18) \
MMI_XSawp_HW(f0, f2, f4, f6, f16, f18) \
MMI_XSawp_HW(f8, f10, f12, f14, f4, f6) \
MMI_XSawp_WD(f0, f2, f8, f10, f12, f14) \
MMI_XSawp_WD(f16, f18, f4, f6, f8, f10) \
MMI_XSawp_DQ(f0, f2, f16, f18, f4, f6) \
MMI_XSawp_DQ(f12, f14, f8, f10, f16, f18)
#define MMI_TransTwo8x8B(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26, f28, f30, r0, r1) \
"dmfc1 "#r0", "#f28" \n\t" \
"dmfc1 "#r1", "#f30" \n\t" \
MMI_XSawp_BH(f0, f2, f4, f6, f28, f30) \
MMI_XSawp_BH(f8, f10, f12, f14, f4, f6) \
MMI_XSawp_BH(f16, f18, f20, f22, f12, f14) \
"dmtc1 "#r0", "#f20" \n\t" \
"dmtc1 "#r1", "#f22" \n\t" \
"dmfc1 "#r0", "#f12" \n\t" \
"dmfc1 "#r1", "#f14" \n\t" \
MMI_XSawp_BH(f24, f26, f20, f22, f12, f14) \
MMI_XSawp_HW(f0, f2, f8, f10, f20, f22) \
MMI_XSawp_HW(f28, f30, f4, f6, f8, f10) \
MMI_XSawp_HW(f16, f18, f24, f26, f4, f6) \
"dmtc1 "#r0", "#f24" \n\t" \
"dmtc1 "#r1", "#f26" \n\t" \
"dmfc1 "#r0", "#f8" \n\t" \
"dmfc1 "#r1", "#f10" \n\t" \
MMI_XSawp_HW(f24, f26, f12, f14, f8, f10) \
MMI_XSawp_WD(f0, f2, f16, f18, f12, f14) \
MMI_XSawp_WD(f20, f22, f4, f6, f16, f18) \
MMI_XSawp_WD(f28, f30, f24, f26, f4, f6) \
"dmtc1 "#r0", "#f24" \n\t" \
"dmtc1 "#r1", "#f26" \n\t" \
"dmfc1 "#r0", "#f16" \n\t" \
"dmfc1 "#r1", "#f18" \n\t" \
MMI_XSawp_WD(f24, f26, f8, f10, f16, f18) \
MMI_XSawp_DQ(f0, f2, f28, f30, f8, f10) \
MMI_XSawp_DQ(f12, f14, f4, f6, f28, f30) \
MMI_XSawp_DQ(f20, f22, f24, f26, f4, f6) \
"dmtc1 "#r0", "#f24" \n\t" \
"dmtc1 "#r1", "#f26" \n\t" \
"dmfc1 "#r0", "#f0" \n\t" \
"dmfc1 "#r1", "#f2" \n\t" \
MMI_XSawp_DQ(f24, f26, f16, f18, f0, f2) \
"dmtc1 "#r0", "#f16" \n\t" \
"dmtc1 "#r1", "#f18" \n\t"
#define MMI_XSwap_HW_SINGLE(f0, f2, f4) \
"punpckhhw "#f4", "#f0", "#f2" \n\t" \
"punpcklhw "#f0", "#f0", "#f2" \n\t"
#define MMI_XSwap_WD_SINGLE(f0, f2, f4) \
"punpckhwd "#f4", "#f0", "#f2" \n\t" \
"punpcklwd "#f0", "#f0", "#f2" \n\t"
#define MMI_Trans4x4H_SINGLE(f0, f2, f4, f6, f8) \
MMI_XSwap_HW_SINGLE(f0, f2, f8) \
MMI_XSwap_HW_SINGLE(f4, f6, f2) \
MMI_XSwap_WD_SINGLE(f0, f4, f6) \
MMI_XSwap_WD_SINGLE(f8, f2, f4)
#define MMI_SumSub_SINGLE(f0, f2, f4) \
"mov.d "#f4", "#f2" \n\t" \
"psubh "#f2", "#f2", "#f0" \n\t" \
"paddh "#f0", "#f0", "#f4" \n\t"
#define MMI_SumSubMul2_SINGLE(f0, f2, f4, f6) \
"mov.d "#f4", "#f0" \n\t" \
"psllh "#f0", "#f0", "#f6" \n\t" \
"paddh "#f0", "#f0", "#f2" \n\t" \
"psllh "#f2", "#f2", "#f6" \n\t" \
"psubh "#f4", "#f4", "#f2" \n\t"
//f4 should be 0x0
#define MMI_Copy8Times(f0, f2, f4, r0) \
"dmtc1 "#r0", "#f0" \n\t" \
"pshufh "#f0", "#f0", "#f4" \n\t" \
"mov.d "#f2", "#f0" \n\t"
//f4 should be 0x0
#define MMI_Copy16Times(f0, f2, f4, r0) \
"dmtc1 "#r0", "#f0" \n\t" \
"punpcklbh "#f0", "#f0", "#f0" \n\t" \
"pshufh "#f0", "#f0", "#f4" \n\t" \
"mov.d "#f2", "#f0" \n\t"
#define MMI_SumSubDiv2_SINGLE(f0, f2, f4, f6) \
"psrah "#f4", "#f2", "#f6" \n\t" \
"paddh "#f4", "#f4", "#f0" \n\t" \
"psrah "#f0", "#f0", "#f6" \n\t" \
"psubh "#f0", "#f0", "#f2" \n\t"
#define MMI_IDCT_SINGLE(f0, f2, f4, f6, f8, f10, f12) \
MMI_SumSub_SINGLE(f6, f8, f10) \
MMI_SumSubDiv2_SINGLE(f4, f2, f0, f12) \
MMI_SumSub_SINGLE(f0, f6, f10) \
MMI_SumSub_SINGLE(f4, f8, f10)
#define MMI_StoreDiff4P_SINGLE(f0, f2, f4, f6, r0, r1, f8) \
"gsldlc1 "#f2", 0x7("#r1") \n\t" \
"gsldrc1 "#f2", 0x0("#r1") \n\t" \
"punpcklbh "#f2", "#f2", "#f6" \n\t" \
"paddh "#f0", "#f0", "#f4" \n\t" \
"psrah "#f0", "#f0", "#f8" \n\t" \
"paddsh "#f0", "#f0", "#f2" \n\t" \
"packushb "#f0", "#f0", "#f2" \n\t" \
"gsswlc1 "#f0", 0x3("#r0") \n\t" \
"gsswrc1 "#f0", 0x0("#r0") \n\t"
#define SUMH_HORIZON(f0, f2, f4, f6, f8) \
"paddh "#f0", "#f0", "#f2" \n\t" \
"punpckhhw "#f2", "#f0", "#f8" \n\t" \
"punpcklhw "#f0", "#f0", "#f8" \n\t" \
"paddw "#f0", "#f0", "#f2" \n\t" \
"punpckhwd "#f2", "#f0", "#f0" \n\t" \
"paddw "#f0", "#f0", "#f2" \n\t"
#define LOAD_COLUMN(f0, f2, f4, f6, f8, f10, f12, f14, r0, r1, r2) \
"daddu "#r2", "#r0", "#r1" \n\t" \
"gsldlc1 "#f0", 0x7("#r0") \n\t" \
"gsldlc1 "#f4", 0x7("#r2") \n\t" \
"gsldrc1 "#f0", 0x0("#r0") \n\t" \
"gsldrc1 "#f4", 0x0("#r2") \n\t" \
"punpcklbh "#f0", "#f0", "#f4" \n\t" \
"daddu "#r0", "#r2", "#r1" \n\t" \
"daddu "#r2", "#r0", "#r1" \n\t" \
"gsldlc1 "#f8", 0x7("#r0") \n\t" \
"gsldlc1 "#f4", 0x7("#r2") \n\t" \
"gsldrc1 "#f8", 0x0("#r0") \n\t" \
"gsldrc1 "#f4", 0x0("#r2") \n\t" \
"punpcklbh "#f8", "#f8", "#f4" \n\t" \
"punpckhhw "#f2", "#f0", "#f8" \n\t" \
"punpcklhw "#f0", "#f0", "#f8" \n\t" \
"daddu "#r0", "#r2", "#r1" \n\t" \
"daddu "#r2", "#r0", "#r1" \n\t" \
"gsldlc1 "#f12", 0x7("#r0") \n\t" \
"gsldlc1 "#f4", 0x7("#r2") \n\t" \
"gsldrc1 "#f12", 0x0("#r0") \n\t" \
"gsldrc1 "#f4", 0x0("#r2") \n\t" \
"punpcklbh "#f12", "#f12", "#f4" \n\t" \
"daddu "#r0", "#r2", "#r1" \n\t" \
"daddu "#r2", "#r0", "#r1" \n\t" \
"gsldlc1 "#f8", 0x7("#r0") \n\t" \
"gsldlc1 "#f4", 0x7("#r2") \n\t" \
"gsldrc1 "#f8", 0x0("#r0") \n\t" \
"gsldrc1 "#f4", 0x0("#r2") \n\t" \
"punpcklbh "#f8", "#f8", "#f4" \n\t" \
"punpckhhw "#f14", "#f12", "#f8" \n\t" \
"punpcklhw "#f12", "#f12", "#f8" \n\t" \
"daddu "#r0", "#r2", "#r1" \n\t" \
"punpcklwd "#f0", "#f2", "#f14" \n\t" \
"punpckhwd "#f2", "#f2", "#f14" \n\t"
#define LOAD_COLUMN_C(f0, f2, f4, f6, r0, r1, r2) \
"daddu "#r2", "#r0", "#r1" \n\t" \
"gsldlc1 "#f0", 0x7("#r0") \n\t" \
"gsldlc1 "#f2", 0x7("#r2") \n\t" \
"gsldrc1 "#f0", 0x0("#r0") \n\t" \
"gsldrc1 "#f2", 0x0("#r2") \n\t" \
"punpcklbh "#f0", "#f0", "#f2" \n\t" \
"daddu "#r0", "#r2", "#r1" \n\t" \
"daddu "#r2", "#r0", "#r1" \n\t" \
"gsldlc1 "#f4", 0x7("#r0") \n\t" \
"gsldlc1 "#f2", 0x7("#r2") \n\t" \
"gsldrc1 "#f4", 0x0("#r0") \n\t" \
"gsldrc1 "#f2", 0x0("#r2") \n\t" \
"punpcklbh "#f4", "#f4", "#f2" \n\t" \
"punpckhhw "#f0", "#f0", "#f4" \n\t" \
"daddu "#r0", "#r2", "#r1" \n\t"
/**
* backup register
*/
#define BACKUP_REG \
double __attribute__((aligned(16))) __back_temp[8]; \
if (_MIPS_SIM == _ABI64) \
__asm__ volatile ( \
"gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
"gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
"gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
"gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
: \
: [temp]"r"(__back_temp) \
: "memory" \
); \
else \
__asm__ volatile ( \
"gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
"gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
"gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
: \
: [temp]"r"(__back_temp) \
: "memory" \
);
/**
* recover register
*/
#define RECOVER_REG \
if (_MIPS_SIM == _ABI64) \
__asm__ volatile ( \
"gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
"gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
"gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
"gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
: \
: [temp]"r"(__back_temp) \
: "memory" \
); \
else \
__asm__ volatile ( \
"gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
"gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
"gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
: \
: [temp]"r"(__back_temp) \
: "memory" \
);
# define OK 1
# define NOTOK 0
#endif /* ASMDEFS_MMI_H_ */

View File

@ -0,0 +1,96 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef WELS_COPY_MB_H_
#define WELS_COPY_MB_H_
#include "typedefs.h"
/****************************************************************************
* Copy functions
****************************************************************************/
void WelsCopy4x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy8x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy4x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy8x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy8x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); //
void WelsCopy16x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); //
void WelsCopy16x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
#if defined (X86_ASM)
void WelsCopy8x8_mmx (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy8x16_mmx (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x8NotAligned_sse2 (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
void WelsCopy16x16_sse2 (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
void WelsCopy16x16NotAligned_sse2 (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
#endif//X86_ASM
#if defined (HAVE_NEON)
void WelsCopy8x8_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x16_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x16NotAligned_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x8NotAligned_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy8x16_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
#endif
#if defined (HAVE_NEON_AARCH64)
void WelsCopy8x8_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x16_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x16NotAligned_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x8NotAligned_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy8x16_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
#endif
#if defined (HAVE_MMI)
void WelsCopy8x8_mmi (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy8x16_mmi (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x8NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
void WelsCopy16x16_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
void WelsCopy16x16NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
#endif//HAVE_MMI
#if defined (HAVE_MSA)
void WelsCopy8x8_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy8x16_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x8_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
void WelsCopy16x16_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
#endif//HAVE_MSA
#if defined(__cplusplus)
}
#endif//__cplusplus
#endif //SAMPLE_H_

View File

@ -0,0 +1,80 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file cpu.h
*
* \brief CPU feature compatibility detection
*
* \date 04/29/2009 Created
*
*************************************************************************************
*/
#if !defined(WELS_CPU_DETECTION_H__)
#define WELS_CPU_DETECTION_H__
#include "typedefs.h"
#include "cpu_core.h"
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
#if defined(X86_ASM)
/*
* cpuid support verify routine
* return 0 if cpuid is not supported by cpu
*/
int32_t WelsCPUIdVerify();
void WelsCPUId (uint32_t uiIndex, uint32_t* pFeatureA, uint32_t* pFeatureB, uint32_t* pFeatureC, uint32_t* pFeatureD);
int32_t WelsCPUSupportAVX (uint32_t eax, uint32_t ecx);
int32_t WelsCPUSupportFMA (uint32_t eax, uint32_t ecx);
void WelsEmms();
/*
* clear FPU registers states for potential float based calculation if support
*/
void WelsCPURestore (const uint32_t kuiCPU);
#else
#define WelsEmms()
#endif
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors);
#if defined(__cplusplus)
}
#endif//__cplusplus
#endif//WELS_CPU_DETECTION_H__

View File

@ -0,0 +1,95 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file cpu_core.h
*
* \brief cpu core feature detection
*
* \date 4/24/2009 Created
*
*************************************************************************************
*/
#if !defined(WELS_CPU_CORE_FEATURE_DETECTION_H__)
#define WELS_CPU_CORE_FEATURE_DETECTION_H__
/*
* WELS CPU feature flags
*/
#define WELS_CPU_MMX 0x00000001 /* mmx */
#define WELS_CPU_MMXEXT 0x00000002 /* mmx-ext*/
#define WELS_CPU_SSE 0x00000004 /* sse */
#define WELS_CPU_SSE2 0x00000008 /* sse 2 */
#define WELS_CPU_SSE3 0x00000010 /* sse 3 */
#define WELS_CPU_SSE41 0x00000020 /* sse 4.1 */
#define WELS_CPU_3DNOW 0x00000040 /* 3dnow! */
#define WELS_CPU_3DNOWEXT 0x00000080 /* 3dnow! ext */
#define WELS_CPU_ALTIVEC 0x00000100 /* altivec */
#define WELS_CPU_SSSE3 0x00000200 /* ssse3 */
#define WELS_CPU_SSE42 0x00000400 /* sse 4.2 */
/* CPU features application extensive */
#define WELS_CPU_FPU 0x00001000 /* x87-FPU on chip */
#define WELS_CPU_HTT 0x00002000 /* Hyper-Threading Technology (HTT), Multi-threading enabled feature:
physical processor package is capable of supporting more than one logic processor
*/
#define WELS_CPU_CMOV 0x00004000 /* Conditional Move Instructions,
also if x87-FPU is present at indicated by the CPUID.FPU feature bit, then FCOMI and FCMOV are supported
*/
#define WELS_CPU_MOVBE 0x00008000 /* MOVBE instruction */
#define WELS_CPU_AES 0x00010000 /* AES instruction extensions */
#define WELS_CPU_FMA 0x00020000 /* AVX VEX FMA instruction sets */
#define WELS_CPU_AVX 0x00000800 /* Advanced Vector eXtentions */
#ifdef HAVE_AVX2
#define WELS_CPU_AVX2 0x00040000 /* AVX2 */
#else
#define WELS_CPU_AVX2 0x00000000 /* !AVX2 */
#endif
#define WELS_CPU_CACHELINE_16 0x10000000 /* CacheLine Size 16 */
#define WELS_CPU_CACHELINE_32 0x20000000 /* CacheLine Size 32 */
#define WELS_CPU_CACHELINE_64 0x40000000 /* CacheLine Size 64 */
#define WELS_CPU_CACHELINE_128 0x80000000 /* CacheLine Size 128 */
/* For the android OS */
#define WELS_CPU_ARMv7 0x000001 /* ARMv7 */
#define WELS_CPU_VFPv3 0x000002 /* VFPv3 */
#define WELS_CPU_NEON 0x000004 /* NEON */
/* For loongson */
#define WELS_CPU_MMI 0x00000001 /* mmi */
#define WELS_CPU_MSA 0x00000002 /* msa */
/*
* Interfaces for CPU core feature detection as below
*/
#endif//WELS_CPU_CORE_FEATURE_DETECTION_H__

View File

@ -0,0 +1,101 @@
/*!
* \copy
* Copyright (c) 2010-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file crt_util_safe_x.h
*
* \brief Safe CRT like util for cross platfroms support
*
* \date 06/04/2010 Created
*
*************************************************************************************
*/
#ifndef WELS_CRT_UTIL_SAFE_CROSS_PLATFORMS_H__
#define WELS_CRT_UTIL_SAFE_CROSS_PLATFORMS_H__
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#if defined(_WIN32)
#include <windows.h>
#include <sys/types.h>
#include <sys/timeb.h>
#else
#include <sys/time.h>
#include "typedefs.h"
#endif//_WIN32
#include "typedefs.h"
#ifdef __cplusplus
extern "C" {
#endif
#define WELS_FILE_SEEK_SET SEEK_SET
#define WELS_FILE_SEEK_CUR SEEK_CUR
#define WESL_FILE_SEEK_END SEEK_END
typedef FILE WelsFileHandle;
#ifdef _WIN32
typedef struct _timeb SWelsTime;
#else
typedef struct TagWelsTime {
time_t time;
unsigned short millitm;
} SWelsTime;
#endif
int32_t WelsSnprintf (char* buffer, int32_t sizeOfBuffer, const char* format, ...);
char* WelsStrncpy (char* dest, int32_t sizeInBytes, const char* src);
char* WelsStrcat (char* dest, uint32_t sizeInBytes, const char* src);
int32_t WelsVsnprintf (char* buffer, int32_t sizeOfBuffer, const char* format, va_list argptr);
WelsFileHandle* WelsFopen (const char* filename, const char* mode);
int32_t WelsFclose (WelsFileHandle* fp);
int32_t WelsFread (void* buffer, int32_t size, int32_t count, WelsFileHandle* fp);
int32_t WelsFwrite (const void* buffer, int32_t size, int32_t count, WelsFileHandle* fp);
int32_t WelsFseek (WelsFileHandle* fp, int32_t offset, int32_t origin);
int32_t WelsFflush (WelsFileHandle* fp);
int32_t WelsGetTimeOfDay (SWelsTime* tp);
int32_t WelsStrftime (char* buffer, int32_t size, const char* format, const SWelsTime* tp);
uint16_t WelsGetMillisecond (const SWelsTime* tp);
#ifdef __cplusplus
}
#endif
#endif//WELS_CRT_UTIL_SAFE_CROSS_PLATFORMS_H__

View File

@ -0,0 +1,112 @@
#ifndef WELS_DEBLOCKING_COMMON_H__
#define WELS_DEBLOCKING_COMMON_H__
#include "typedefs.h"
void DeblockLumaLt4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaLt4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTc);
void DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTc);
void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTc);
void DeblockChromaEq4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTc);
void DeblockChromaEq4H2_c (uint8_t* pPixCbCr,int32_t iStride, int32_t iAlpha, int32_t iBeta);
void WelsNonZeroCount_c (int8_t* pNonZeroCount);
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
#ifdef X86_ASM
void DeblockLumaLt4V_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaTransposeH2V_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pDst);
void DeblockLumaTransposeV2H_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pSrc);
void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaEq4V_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_ssse3 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void WelsNonZeroCount_sse2 (int8_t* pNonZeroCount);
#endif
#if defined(HAVE_NEON)
void DeblockLumaLt4V_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaLt4H_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4V_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4H_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void WelsNonZeroCount_neon (int8_t* pNonZeroCount);
#endif
#if defined(HAVE_NEON_AARCH64)
void DeblockLumaLt4V_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaLt4H_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_AArch64_neon (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4V_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4H_AArch64_neon (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void WelsNonZeroCount_AArch64_neon (int8_t* pNonZeroCount);
#endif
#if defined(HAVE_MMI)
void DeblockLumaLt4V_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaTransposeH2V_mmi (uint8_t* pPixY, int32_t iStride, uint8_t* pDst);
void DeblockLumaTransposeV2H_mmi (uint8_t* pPixY, int32_t iStride, uint8_t* pSrc);
void DeblockLumaLt4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaEq4V_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4H_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void WelsNonZeroCount_mmi (int8_t* pNonZeroCount);
#endif//HAVE_MMI
#if defined(HAVE_MSA)
void DeblockLumaLt4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaLt4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaEq4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void WelsNonZeroCount_msa (int8_t* pNonZeroCount);
#endif//HAVE_MSA
#if defined(__cplusplus)
}
#endif//__cplusplus
#endif //WELS_DEBLOCKING_COMMON_H__

View File

@ -0,0 +1,107 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file expand_pic.h
*
* \brief Interface for expanding reconstructed picture to be used for reference
*
* \date 06/08/2009
*************************************************************************************
*/
#ifndef EXPAND_PICTURE_H
#define EXPAND_PICTURE_H
#include "typedefs.h"
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
#define PADDING_LENGTH 32 // reference extension
#define CHROMA_PADDING_LENGTH 16 // chroma reference extension
#if defined(X86_ASM)
void ExpandPictureLuma_sse2 (uint8_t* pDst,
const int32_t kiStride,
const int32_t kiPicW,
const int32_t kiPicH);
void ExpandPictureChromaAlign_sse2 (uint8_t* pDst,
const int32_t kiStride,
const int32_t kiPicW,
const int32_t kiPicH);
void ExpandPictureChromaUnalign_sse2 (uint8_t* pDst,
const int32_t kiStride,
const int32_t kiPicW,
const int32_t kiPicH);
#endif//X86_ASM
#if defined(HAVE_NEON)
void ExpandPictureLuma_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH);
void ExpandPictureChroma_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH);
#endif
#if defined(HAVE_NEON_AARCH64)
void ExpandPictureLuma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH);
void ExpandPictureChroma_AArch64_neon (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
const int32_t kiPicH);
#endif
#if defined(HAVE_MMI)
void ExpandPictureLuma_mmi (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
const int32_t kiPicH);
void ExpandPictureChromaAlign_mmi (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
const int32_t kiPicH);
void ExpandPictureChromaUnalign_mmi (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
const int32_t kiPicH);
#endif//HAVE_MMI
typedef void (*PExpandPictureFunc) (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH);
typedef struct TagExpandPicFunc {
PExpandPictureFunc pfExpandLumaPicture;
PExpandPictureFunc pfExpandChromaPicture[2];
} SExpandPicFunc;
void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight);
void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight);
void ExpandReferencingPicture (uint8_t* pData[3], int32_t iWidth, int32_t iHeight, int32_t iStride[3],
PExpandPictureFunc pExpLuma, PExpandPictureFunc pExpChrom[2]);
void InitExpandPictureFunc (SExpandPicFunc* pExpandPicFunc, const uint32_t kuiCPUFlags);
#if defined(__cplusplus)
}
#endif//__cplusplus
#endif

View File

@ -0,0 +1,166 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file golomb_common.h
*
* \brief Exponential Golomb entropy coding/decoding routine
*
* \date 03/12/2015 Created
*
*************************************************************************************
*/
#ifndef EXPONENTIAL_GOLOMB_ENTROPY_CODING_COMMON_H__
#define EXPONENTIAL_GOLOMB_ENTROPY_CODING_COMMON_H__
#include "typedefs.h"
namespace WelsCommon {
#define WRITE_BE_32(ptr, val) do { \
(ptr)[0] = (val) >> 24; \
(ptr)[1] = (val) >> 16; \
(ptr)[2] = (val) >> 8; \
(ptr)[3] = (val) >> 0; \
} while (0)
/************************************************************************/
/* GOLOMB CODIMG FOR WELS COMMON */
/************************************************************************/
/*!
* \brief initialize bitstream writing
*
* \param pBs Bit string auxiliary pointer
* \param pBuf bit-stream pBuffer
* \param iSize iSize in bits for decoder; iSize in bytes for encoder
*
* \return iSize of pBuffer pData in byte; failed in -1 return
*/
static inline int32_t InitBits (SBitStringAux* pBs, const uint8_t* kpBuf, const int32_t kiSize) {
uint8_t* ptr = (uint8_t*)kpBuf;
pBs->pStartBuf = ptr;
pBs->pCurBuf = ptr;
pBs->pEndBuf = ptr + kiSize;
pBs->iLeftBits = 32;
pBs->uiCurBits = 0;
return kiSize;
}
static inline int32_t BsWriteBits (PBitStringAux pBitString, int32_t iLen, const uint32_t kuiValue) {
if (iLen < pBitString->iLeftBits) {
pBitString->uiCurBits = (pBitString->uiCurBits << iLen) | kuiValue;
pBitString->iLeftBits -= iLen;
} else {
iLen -= pBitString->iLeftBits;
pBitString->uiCurBits = (pBitString->uiCurBits << pBitString->iLeftBits) | (kuiValue >> iLen);
WRITE_BE_32 (pBitString->pCurBuf, pBitString->uiCurBits);
pBitString->pCurBuf += 4;
pBitString->uiCurBits = kuiValue & ((1 << iLen) - 1);
pBitString->iLeftBits = 32 - iLen;
}
return 0;
}
/*
* Write 1 bit
*/
static inline int32_t BsWriteOneBit (PBitStringAux pBitString, const uint32_t kuiValue) {
BsWriteBits (pBitString, 1, kuiValue);
return 0;
}
static inline int32_t BsFlush (PBitStringAux pBitString) {
WRITE_BE_32 (pBitString->pCurBuf, pBitString->uiCurBits << pBitString->iLeftBits);
pBitString->pCurBuf += 4 - pBitString->iLeftBits / 8;
pBitString->iLeftBits = 32;
pBitString->uiCurBits = 0;
return 0;
}
/*
* Write unsigned exp golomb codes
*/
static inline int32_t BsWriteUE (PBitStringAux pBitString, const uint32_t kuiValue) {
uint32_t iTmpValue = kuiValue + 1;
if (256 > kuiValue) {
BsWriteBits (pBitString, g_kuiGolombUELength[kuiValue], kuiValue + 1);
} else {
uint32_t n = 0;
if (iTmpValue & 0xffff0000) {
iTmpValue >>= 16;
n += 16;
}
if (iTmpValue & 0xff00) {
iTmpValue >>= 8;
n += 8;
}
//n += (g_kuiGolombUELength[iTmpValue] >> 1);
n += (g_kuiGolombUELength[iTmpValue - 1] >> 1);
BsWriteBits (pBitString, (n << 1) + 1, kuiValue + 1);
}
return 0;
}
/*
* Write signed exp golomb codes
*/
static inline int32_t BsWriteSE (PBitStringAux pBitString, const int32_t kiValue) {
uint32_t iTmpValue;
if (0 == kiValue) {
BsWriteOneBit (pBitString, 1);
} else if (0 < kiValue) {
iTmpValue = (kiValue << 1) - 1;
BsWriteUE (pBitString, iTmpValue);
} else {
iTmpValue = ((-kiValue) << 1);
BsWriteUE (pBitString, iTmpValue);
}
return 0;
}
/*
* Write RBSP trailing bits
*/
static inline int32_t BsRbspTrailingBits (PBitStringAux pBitString) {
BsWriteOneBit (pBitString, 1);
BsFlush (pBitString);
return 0;
}
}
#endif//EXPONENTIAL_GOLOMB_ENTROPY_CODING_COMMON_H__

View File

@ -0,0 +1,81 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file intra_pred_common.h
*
* \brief interfaces for intra predictor about 16x16.
*
* \date 4/2/2014 Created
*
*************************************************************************************
*/
#ifndef INTRA_PRED_COMMON_H
#define INTRA_PRED_COMMON_H
#include "typedefs.h"
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
#if defined(X86_ASM)
//for intra-prediction ASM functions
void WelsI16x16LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#endif//X86_ASM
#if defined(HAVE_NEON)
void WelsI16x16LumaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#endif//HAVE_NEON
#if defined(HAVE_NEON_AARCH64)
void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#endif//HAVE_NEON_AARCH64
#if defined(HAVE_MMI)
void WelsI16x16LumaPredV_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_mmi (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#endif//HAVE_MMI
#if defined(__cplusplus)
}
#endif//__cplusplus
#endif//

View File

@ -0,0 +1,130 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef ___LD_ST_MACROS___
#define ___LD_ST_MACROS___
#include <string.h>
#include "typedefs.h"
#ifdef __GNUC__
struct tagUnaligned_64 {
uint64_t l;
} __attribute__ ((packed)) __attribute__ ((may_alias));
struct tagUnaligned_32 {
uint32_t l;
} __attribute__ ((packed)) __attribute__ ((may_alias));
struct tagUnaligned_16 {
uint16_t l;
} __attribute__ ((packed)) __attribute__ ((may_alias));
#define LD16(a) (((struct tagUnaligned_16 *) (a))->l)
#define LD32(a) (((struct tagUnaligned_32 *) (a))->l)
#define LD64(a) (((struct tagUnaligned_64 *) (a))->l)
#define STRUCTA(size, align) struct tagUnaligned_##size##_##align {\
uint##size##_t l; \
} __attribute__ ((aligned(align))) __attribute__ ((may_alias))
STRUCTA (16, 2);
STRUCTA (32, 2);
STRUCTA (32, 4);
STRUCTA (64, 2);
STRUCTA (64, 4);
STRUCTA (64, 8);
//#define _USE_STRUCT_INT_CVT
//#ifdef _USE_STRUCT_INT_CVT
#define ST16(a, b) (((struct tagUnaligned_16 *) (a))->l) = (b)
#define ST32(a, b) (((struct tagUnaligned_32 *) (a))->l) = (b)
#define ST64(a, b) (((struct tagUnaligned_64 *) (a))->l) = (b)
#define LDA(a, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l)
#define STA(a, b, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l) = (b)
#define LD16A2(a) LDA(a, 16, 2)
#define LD32A2(a) LDA(a, 32, 2)
#define LD32A4(a) LDA(a, 32, 4)
#define LD64A2(a) LDA(a, 64, 2)
#define LD64A4(a) LDA(a, 64, 4)
#define LD64A8(a) LDA(a, 64, 8)
#define ST16A2(a, b) STA(a, b, 16, 2)
#define ST32A2(a, b) STA(a, b, 32, 2)
#define ST32A4(a, b) STA(a, b, 32, 4)
#define ST64A2(a, b) STA(a, b, 64, 2)
#define ST64A4(a, b) STA(a, b, 64, 4)
#define ST64A8(a, b) STA(a, b, 64, 8)
//#else
//inline void __ST16(void *dst, uint16_t v) { memcpy(dst, &v, 2); }
//inline void __ST32(void *dst, uint32_t v) { memcpy(dst, &v, 4); }
//inline void __ST64(void *dst, uint64_t v) { memcpy(dst, &v, 8); }
//#endif
#else
//#define INTD16(a) (*((int16_t*)(a)))
//#define INTD32(a) (*((int32_t*)(a)))
//#define INTD64(a) (*((int64_t*)(a)))
#define LD16(a) (*((uint16_t*)(a)))
#define LD32(a) (*((uint32_t*)(a)))
#define LD64(a) (*((uint64_t*)(a)))
#define ST16(a, b) *((uint16_t*)(a)) = (b)
#define ST32(a, b) *((uint32_t*)(a)) = (b)
#define ST64(a, b) *((uint64_t*)(a)) = (b)
#define LD16A2 LD16
#define LD32A2 LD32
#define LD32A4 LD32
#define LD64A2 LD64
#define LD64A4 LD64
#define LD64A8 LD64
#define ST16A2 ST16
#define ST32A2 ST32
#define ST32A4 ST32
#define ST64A2 ST64
#define ST64A4 ST64
#define ST64A8 ST64
#endif /* !__GNUC__ */
#ifndef INTD16
#define INTD16 LD16
#endif//INTD16
#ifndef INTD32
#define INTD32 LD32
#endif//INTD32
#ifndef INTD64
#define INTD64 LD64
#endif//INTD64
#endif//___LD_ST_MACROS___

View File

@ -0,0 +1,329 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file macros.h
*
* \brief MACRO based tool utilization
*
* \date 3/13/2009 Created
*
*************************************************************************************
*/
#ifndef WELS_MACRO_UTILIZATIONS_H__
#define WELS_MACRO_UTILIZATIONS_H__
#include <math.h>
#include <assert.h>
#include <string.h>
#include "typedefs.h"
/*
* ENFORCE_STACK_ALIGN_1D: force 1 dimension local data aligned in stack
* _tp: type
* _nm: var name
* _sz: size
* _al: align bytes
* auxiliary var: _nm ## _tEmP
*/
#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \
_tp _nm ## _tEmP[(_sz)+(_al)-1]; \
_tp *_nm = _nm ## _tEmP + ((_al)-1) - (((uintptr_t)(_nm ## _tEmP + ((_al)-1)) & ((_al)-1))/sizeof(_tp));
#define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \
assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\
_tp _nm ## _tEmP[(_cx)*(_cy)+(_al)/sizeof(_tp)-1]; \
_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \
_nm ## _tEmP_al -= (((uintptr_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \
_tp (*_nm)[(_cy)] = (_tp (*)[(_cy)])_nm ## _tEmP_al;
#if defined(_MSC_VER)
#if(_MSC_VER < 1700)
#define inline __inline
#endif
#define ALIGNED_DECLARE( type, var, n ) __declspec(align(n)) type var
#elif defined(__GNUC__)
#define ALIGNED_DECLARE( type, var, n ) type var __attribute__((aligned(n)))
#endif//_MSC_VER
#ifndef WELS_ALIGN
#define WELS_ALIGN(x, n) (((x)+(n)-1)&~((n)-1))
#endif//WELS_ALIGN
#if 1 // Alternative implementation of WELS_MAX and WELS_MIN
#ifndef WELS_MAX
#define WELS_MAX(x, y) ((x) > (y) ? (x) : (y))
#endif//WELS_MAX
#ifndef WELS_MIN
#define WELS_MIN(x, y) ((x) < (y) ? (x) : (y))
#endif//WELS_MIN
#ifndef WELS_MIN_POSITIVE
#define WELS_MIN_POSITIVE(x, y) (x >= 0 && y >= 0) ? WELS_MIN(x, y) : WELS_MAX(x, y);
#endif//WELS_MIN_POSITIVE
#else // Alternative implementation of WELS_MAX and WELS_MIN
#ifndef WELS_MAX
#define WELS_MAX(x, y) ((x) - (((x)-(y))&(((x)-(y))>>31)))
#endif//WELS_MAX
#ifndef WELS_MIN
#define WELS_MIN(x, y) ((y) + (((x)-(y))&(((x)-(y))>>31)))
#endif//WELS_MIN
#endif // Alternative implementation of WELS_MAX and WELS_MIN
#ifndef WELS_CEIL
#define WELS_CEIL(x) ceil(x) // FIXME: low complexity instead of math library used
#endif//WELS_CEIL
#ifndef WELS_FLOOR
#define WELS_FLOOR(x) floor(x) // FIXME: low complexity instead of math library used
#endif//WELS_FLOOR
#ifndef WELS_ROUND
#define WELS_ROUND(x) ((int32_t)(0.5+(x)))
#endif//WELS_ROUND
#ifndef WELS_ROUND64
#define WELS_ROUND64(x) ((int64_t)(0.5+(x)))
#endif//WELS_ROUND
#ifndef WELS_DIV_ROUND
#define WELS_DIV_ROUND(x,y) ((int32_t)((y)==0?((x)/((y)+1)):(((y)/2+(x))/(y))))
#endif//WELS_DIV_ROUND
#ifndef WELS_DIV_ROUND64
#define WELS_DIV_ROUND64(x,y) ((int64_t)((y)==0?((x)/((y)+1)):(((y)/2+(x))/(y))))
#endif//WELS_DIV_ROUND64
#define WELS_NON_ZERO_COUNT_AVERAGE(nC,nA,nB) { \
nC = nA + nB + 1; \
nC >>= (uint8_t)( nA != -1 && nB != -1); \
nC += (uint8_t)(nA == -1 && nB == -1); \
}
static inline int32_t CeilLog2 (int32_t i) {
int32_t s = 0;
i--;
while (i > 0) {
s++;
i >>= 1;
}
return s;
}
/*
the second path will degrades the performance
*/
#if 1
static inline int32_t WelsMedian (int32_t iX, int32_t iY, int32_t iZ) {
int32_t iMin = iX, iMax = iX;
if (iY < iMin)
iMin = iY;
else
iMax = iY;
if (iZ < iMin)
iMin = iZ;
else if (iZ > iMax)
iMax = iZ;
return (iX + iY + iZ) - (iMin + iMax);
}
#else
static inline int32_t WelsMedian (int32_t iX, int32_t iY, int32_t iZ) {
int32_t iTmp = (iX - iY) & ((iX - iY) >> 31);
iX -= iTmp;
iY += iTmp;
iY -= (iY - iZ) & ((iY - iZ) >> 31);
iY += (iX - iY) & ((iX - iY) >> 31);
return iY;
}
#endif
#ifndef NEG_NUM
//#define NEG_NUM( num ) (-num)
#define NEG_NUM(iX) (1+(~(iX)))
#endif// NEG_NUM
static inline uint8_t WelsClip1 (int32_t iX) {
uint8_t uiTmp = (uint8_t) (((iX) & ~255) ? (- (iX) >> 31) : (iX));
return uiTmp;
}
#ifndef WELS_SIGN
#define WELS_SIGN(iX) ((int32_t)(iX) >> 31)
#endif //WELS_SIGN
#ifndef WELS_ABS
#if 1
#define WELS_ABS(iX) ((iX)>0 ? (iX) : -(iX))
#else
#define WELS_ABS(iX) ((WELS_SIGN(iX) ^ (int32_t)(iX)) - WELS_SIGN(iX))
#endif
#endif //WELS_ABS
// WELS_CLIP3
#ifndef WELS_CLIP3
#define WELS_CLIP3(iX, iY, iZ) ((iX) < (iY) ? (iY) : ((iX) > (iZ) ? (iZ) : (iX)))
#endif //WELS_CLIP3
template<typename T> T WelsClip3(T iX, T iY, T iZ) {
if (iX < iY)
return iY;
if (iX > iZ)
return iZ;
return iX;
}
#define DISALLOW_COPY_AND_ASSIGN(cclass) \
private: \
cclass(const cclass &); \
cclass& operator=(const cclass &);
/*
* Description: to check variable validation and return the specified result
* iResult: value to be checked
* iExpected: the expected value
*/
#ifndef WELS_VERIFY_RETURN_IFNEQ
#define WELS_VERIFY_RETURN_IFNEQ(iResult, iExpected) \
if (iResult != iExpected) { \
return iResult; \
}
#endif//#if WELS_VERIFY_RETURN_IF
/*
* Description: to check variable validation and return the specified result
* iResult: value to be return
* bCaseIf: negative condition to be verified
*/
#ifndef WELS_VERIFY_RETURN_IF
#define WELS_VERIFY_RETURN_IF(iResult, bCaseIf) \
if (bCaseIf) { \
return iResult; \
}
#endif//#if WELS_VERIFY_RETURN_IF
/*
* Description: to check variable validation and return the specified result
* with correspoinding process advance.
* result: value to be return
* case_if: negative condition to be verified
* proc: process need perform
*/
#ifndef WELS_VERIFY_RETURN_PROC_IF
#define WELS_VERIFY_RETURN_PROC_IF(iResult, bCaseIf, fProc) \
if (bCaseIf) { \
fProc; \
return iResult; \
}
#endif//#if WELS_VERIFY_RETURN_PROC_IF
static inline int32_t WELS_LOG2 (uint32_t v) {
int32_t r = 0;
while (v >>= 1) {
++r;
}
return r;
}
#define CLIP3_QP_0_51(q) WELS_CLIP3(q, 0, 51) // ((q) < (0) ? (0) : ((q) > (51) ? (51) : (q)))
#define CALC_BI_STRIDE(width,bitcount) ((((width * bitcount) + 31) & ~31) >> 3)
#ifndef BUTTERFLY1x2
#define BUTTERFLY1x2(b) (((b)<<8) | (b))
#endif//BUTTERFLY1x2
#ifndef BUTTERFLY2x4
#define BUTTERFLY2x4(wd) (((uint32_t)(wd)<<16) |(wd))
#endif//BUTTERFLY2x4
#ifndef BUTTERFLY4x8
#define BUTTERFLY4x8(dw) (((uint64_t)(dw)<<32) | (dw))
#endif//BUTTERFLY4x8
static inline bool WELS_POWER2_IF (uint32_t v) {
return (v && ! (v & (v - 1)));
}
#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)
#define WELS_GCC_UNUSED __attribute__((__unused__))
#else
#define WELS_GCC_UNUSED
#endif
inline bool CheckInRangeCloseOpen (const int16_t kiCurrent, const int16_t kiMin, const int16_t kiMax) {
return ((kiCurrent >= kiMin) && (kiCurrent < kiMax));
}
static inline void WelsSetMemUint32_c (uint32_t* pDst, uint32_t iValue, int32_t iSizeOfData) {
for (int i = 0; i < iSizeOfData; i++) {
pDst[i] = iValue;
}
}
static inline void WelsSetMemUint16_c (uint16_t* pDst, uint16_t iValue, int32_t iSizeOfData) {
for (int i = 0; i < iSizeOfData; i++) {
pDst[i] = iValue;
}
}
inline void WelsSetMemMultiplebytes_c (void* pDst, uint32_t iValue, int32_t iSizeOfData, int32_t iDataLengthOfData) {
assert (4 == iDataLengthOfData || 2 == iDataLengthOfData || 1 == iDataLengthOfData);
// TODO: consider add assembly for these functions
if (0 != iValue) {
if (4 == iDataLengthOfData) {
WelsSetMemUint32_c (static_cast<uint32_t*> (pDst), static_cast<uint32_t> (iValue), iSizeOfData);
} else if (2 == iDataLengthOfData) {
WelsSetMemUint16_c (static_cast<uint16_t*> (pDst), static_cast<uint16_t> (iValue), iSizeOfData);
} else {
memset (static_cast<uint8_t*> (pDst), static_cast<uint8_t> (iValue), iSizeOfData);
}
} else {
memset (static_cast<uint8_t*> (pDst), 0, iSizeOfData * iDataLengthOfData);
}
}
#endif//WELS_MACRO_UTILIZATIONS_H__

View File

@ -0,0 +1,363 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef MC_H
#define MC_H
#include "typedefs.h"
typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight);
typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t,
int32_t, int32_t);
typedef struct TagMcFunc {
PWelsLumaHalfpelMcFunc pfLumaHalfpelHor;
PWelsLumaHalfpelMcFunc pfLumaHalfpelVer;
PWelsLumaHalfpelMcFunc pfLumaHalfpelCen;
PWelsMcFunc pMcChromaFunc;
PWelsMcFunc pMcLumaFunc;
PWelsSampleAveragingFunc pfSampleAveraging;
} SMcFunc;
namespace WelsCommon {
void InitMcFunc (SMcFunc* pMcFunc, uint32_t iCpu);
} // namespace WelsCommon
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
#if defined(HAVE_NEON)
void McCopyWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McCopyWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McCopyWidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McChromaWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t* pWeights, int32_t iHeight);
void McChromaWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t* pWeights, int32_t iHeight);
void PixelAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
void PixelAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
void PixelAvgWidthEq4_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
void McHorVer01WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer01WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer01WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer03WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer03WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer03WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer10WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer10WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer10WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer30WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer30WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer30WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
void McHorVer20WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer20WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer20WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
//vertical filter to gain half sample, that is (0, 2) location in quarter sample
void McHorVer02WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer02WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer02WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
void McHorVer22WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer22WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer22WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void PixStrideAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
void PixStrideAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
void McHorVer20Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// width+1
void McHorVer20Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// width+1
void McHorVer20Width5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// width+1
void McHorVer02Height17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// height+1
void McHorVer02Height9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// height+1
void McHorVer02Height5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// height+1
void McHorVer22Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);//width+1&&height+1
void McHorVer22Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);//width+1&&height+1
void McHorVer22Width5_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);//width+1&&height+1
#endif
#if defined(HAVE_NEON_AARCH64)
void McCopyWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McCopyWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McCopyWidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McChromaWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t* pWeights, int32_t iHeight);
void McChromaWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t* pWeights, int32_t iHeight);
void PixelAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
void PixelAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
void PixelAvgWidthEq4_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
void McHorVer01WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer01WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer01WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer03WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer03WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer03WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer10WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer10WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer10WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer30WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer30WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer30WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
void McHorVer20WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer20WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer20WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
//vertical filter to gain half sample, that is (0, 2) location in quarter sample
void McHorVer02WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer02WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer02WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
void McHorVer22WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer22WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer22WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void PixStrideAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
void PixStrideAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
void McHorVer20Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// width+1
void McHorVer20Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// width+1
void McHorVer20Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// width+1
void McHorVer02Height17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// height+1
void McHorVer02Height9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// height+1
void McHorVer02Height5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);// height+1
void McHorVer22Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);//width+1&&height+1
void McHorVer22Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);//width+1&&height+1
void McHorVer22Width5_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);//width+1&&height+1
#endif
#if defined(X86_ASM)
//***************************************************************************//
// MMXEXT definition //
//***************************************************************************//
void McHorVer20WidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McChromaWidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
const uint8_t* kpABCD, int32_t iHeight);
void McCopyWidthEq8_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void PixelAvgWidthEq4_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
void PixelAvgWidthEq8_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
//***************************************************************************//
// SSE2 definition //
//***************************************************************************//
void McChromaWidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
const uint8_t* kpABCD, int32_t iHeight);
void McCopyWidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer20WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer20WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer02WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer22Width8HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
void McHorVer22Width8VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
void McHorVer22Width8VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
void PixelAvgWidthEq16_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
void McHorVer20Width9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth,
int32_t iHeight);
void McHorVer20Width5_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
void McHorVer02Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth,
int32_t iHeight);
void McHorVer02Height5_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
void McHorVer22HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride,
int32_t iWidth,
int32_t iHeight);
void McHorVer22Width5HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride,
int32_t iWidth, int32_t iHeight);
void McHorVer22Width4VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
void McHorVer22Width4VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
//***************************************************************************//
// SSE3 definition //
//***************************************************************************//
void McCopyWidthEq16_sse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
//***************************************************************************//
// SSSE3 definition //
//***************************************************************************//
void McChromaWidthEq8_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
const uint8_t* kpABCD, int32_t iHeight);
void McHorVer02_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
void McHorVer02Width4S16ToU8_ssse3 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McHorVer02Width5S16ToU8_ssse3 (const int16_t* pSrc, int32_t iSrcStride,
uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McHorVer02WidthGe8S16ToU8_ssse3 (const int16_t* pSrc, int32_t iSrcStride,
uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
void McHorVer20_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
void McHorVer20Width4U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
void McHorVer20Width5Or9Or17_ssse3 (const uint8_t* pSrc, int32_t iSrcStride,
uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
void McHorVer20Width8U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride,
int16_t* pDst, int32_t iDstStride, int32_t iHeight);
void McHorVer20Width9Or17U8ToS16_ssse3 (const uint8_t* pSrc, int32_t iSrcStride,
int16_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
//***************************************************************************//
// AVX2 definition //
//***************************************************************************//
#ifdef HAVE_AVX2
void McHorVer02_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
void McHorVer02Width4S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McHorVer02Width5S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McHorVer02Width8S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McHorVer02Width9S16ToU8_avx2 (const int16_t* pSrc, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McHorVer02Width16Or17S16ToU8_avx2 (const int16_t* pSrc, int32_t iSrcStride,
uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
void McHorVer20_avx2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iWidth, int32_t iHeight);
void McHorVer20Width5Or9Or17_avx2 (const uint8_t* pSrc, int32_t iSrcStride,
uint8_t* pDst, int32_t iDstStride, int32_t iWidth, int32_t iHeight);
void McHorVer20Width4U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
void McHorVer20Width8U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
void McHorVer20Width16U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
void McHorVer20Width17U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int16_t* pDst, int32_t iHeight);
#endif //HAVE_AVX2
#endif //X86_ASM
#if defined(__cplusplus)
}
#endif//__cplusplus
#endif//MC_H

View File

@ -0,0 +1,88 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file measure_time.h
*
* \brief time cost measure utilization
*
* \date 04/28/2009 Created
*
*************************************************************************************
*/
#ifndef WELS_TIME_COST_MEASURE_UTIL_H__
#define WELS_TIME_COST_MEASURE_UTIL_H__
#include <stdlib.h>
#include "typedefs.h"
#ifndef _WIN32
#include <sys/time.h>
#else
#include <windows.h>
#endif
#include <time.h>
#ifdef __cplusplus
extern "C" {
#endif//__cplusplus
/*!
* \brief time cost measure utilization
* \param void
* \return time elapsed since run (unit: microsecond)
*/
static inline int64_t WelsTime (void) {
#ifndef _WIN32
struct timeval tv_date;
gettimeofday (&tv_date, NULL);
return ((int64_t) tv_date.tv_sec * 1000000 + (int64_t) tv_date.tv_usec);
#else
static int64_t iMtimeFreq = 0;
int64_t iMtimeCur = 0;
int64_t iResult = 0;
if (!iMtimeFreq) {
QueryPerformanceFrequency ((LARGE_INTEGER*)&iMtimeFreq);
if (!iMtimeFreq)
iMtimeFreq = 1;
}
QueryPerformanceCounter ((LARGE_INTEGER*)&iMtimeCur);
iResult = (int64_t) ((double)iMtimeCur * 1e6 / (double)iMtimeFreq + 0.5);
return iResult;
#endif//_WIN32
}
#ifdef __cplusplus
}
#endif
#endif//WELS_TIME_COST_MEASURE_UTIL_H__

View File

@ -0,0 +1,116 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#if !defined(WELS_COMMON_MEMORY_ALIGN_H__)
#define WELS_COMMON_MEMORY_ALIGN_H__
#include "typedefs.h"
// NOTE: please do not clean below lines even comment, turn on for potential memory leak verify and memory usage monitor etc.
//#define MEMORY_CHECK
#define MEMORY_MONITOR
#ifdef MEMORY_CHECK
#ifndef MEMORY_MONITOR
#define MEMORY_MONITOR
#endif//MEMORY_MONITOR
#endif//MEMORY_CHECK
#ifdef MEMORY_CHECK
#include <stdio.h>
#endif//MEMORY_CHECK
namespace WelsCommon {
class CMemoryAlign {
public:
CMemoryAlign (const uint32_t kuiCacheLineSize);
virtual ~CMemoryAlign();
void* WelsMallocz (const uint32_t kuiSize, const char* kpTag);
void* WelsMalloc (const uint32_t kuiSize, const char* kpTag);
void WelsFree (void* pPointer, const char* kpTag);
const uint32_t WelsGetCacheLineSize() const;
const uint32_t WelsGetMemoryUsage() const;
private:
// private copy & assign constructors adding to fix klocwork scan issues
CMemoryAlign (const CMemoryAlign& kcMa);
CMemoryAlign& operator= (const CMemoryAlign& kcMa);
protected:
uint32_t m_nCacheLineSize;
#ifdef MEMORY_MONITOR
uint32_t m_nMemoryUsageInBytes;
#endif//MEMORY_MONITOR
};
/*!
*************************************************************************************
* \brief malloc with zero filled utilization in Wels
*
* \param kuiSize size of memory block required
*
* \return allocated memory pointer exactly, failed in case of NULL return
*
* \note N/A
*************************************************************************************
*/
void* WelsMallocz (const uint32_t kuiSize, const char* kpTag);
/*!
*************************************************************************************
* \brief free utilization in Wels
*
* \param pPtr data pointer to be free.
* i.e, uint8_t *pPtr = actual data to be free, argv = &pPtr.
*
* \return NONE
*
* \note N/A
*************************************************************************************
*/
void WelsFree (void* pPtr, const char* kpTag);
#define WELS_SAFE_FREE(pPtr, pTag) if (pPtr) { WelsFree(pPtr, pTag); pPtr = NULL; }
#define WELS_NEW_OP(object, type) \
(type*)(new object);
#define WELS_DELETE_OP(p) \
if(p) delete p; \
p = NULL;
}
#endif//WELS_COMMON_MEMORY_ALIGN_H__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,124 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef WELS_SAD_COMMON_H_
#define WELS_SAD_COMMON_H_
#include "typedefs.h"
//===================SAD=====================//
int32_t WelsSampleSad16x16_c (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x8_c (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x16_c (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x8_c (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x4_c( uint8_t *, int32_t, uint8_t *, int32_t );
int32_t WelsSampleSad4x8_c( uint8_t *, int32_t, uint8_t *, int32_t );
int32_t WelsSampleSad4x4_c (uint8_t*, int32_t, uint8_t*, int32_t);
void WelsSampleSadFour16x16_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad);
void WelsSampleSadFour16x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad);
void WelsSampleSadFour8x16_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad);
void WelsSampleSadFour8x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad);
void WelsSampleSadFour4x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad);
void WelsSampleSadFour8x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad);
void WelsSampleSadFour4x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad);
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
#if defined (X86_ASM)
int32_t WelsSampleSad4x4_mmx (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x8_sse2 (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x8_sse21 (uint8_t*, int32_t, uint8_t*, int32_t);
void WelsSampleSadFour16x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour16x8_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x8_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour4x4_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
#endif//X86_ASM
#if defined (HAVE_NEON)
int32_t WelsSampleSad4x4_neon (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x16_neon (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x8_neon (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x16_neon (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x8_neon (uint8_t*, int32_t, uint8_t*, int32_t);
void WelsSampleSadFour16x16_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour16x8_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x16_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x8_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour4x4_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
#endif
#if defined (HAVE_NEON_AARCH64)
int32_t WelsSampleSad4x4_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t);
void WelsSampleSadFour16x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour16x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x16_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x8_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour4x4_AArch64_neon (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
#endif
#if defined (HAVE_MMI)
int32_t WelsSampleSad4x4_mmi (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t);
void WelsSampleSadFour16x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour16x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
#endif//HAVE_MMI
#if defined(__cplusplus)
}
#endif//__cplusplus
#endif //SAMPLE_H_

View File

@ -0,0 +1,86 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
// typedef.h
#ifndef WELS_TYPE_DEFINES_H__
#define WELS_TYPE_DEFINES_H__
#include <limits.h>
#include <stddef.h>
////////////////////////////////////////////////////////////////////////////
// NOTICE : ALL internal implement MUST use the data type defined as below
// ONLY except with the interface file !!!!!
////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER
#define __STDC_FORMAT_MACROS
#include <stdint.h>
#include <inttypes.h>
#ifdef __LP64__
typedef int64_t intX_t;
#else
typedef int32_t intX_t;
#endif
#else
// FIXME: all singed type should be declared explicit, for example, int8_t should be declared as signed char.
typedef signed char int8_t ;
typedef unsigned char uint8_t ;
typedef short int16_t ;
typedef unsigned short uint16_t;
typedef int int32_t ;
typedef unsigned int uint32_t;
typedef __int64 int64_t ;
typedef unsigned __int64 uint64_t;
#define PRId64 "I64d"
#ifdef _WIN64
typedef int64_t intX_t;
#else
typedef int32_t intX_t;
#endif
#endif // _MSC_VER defined
// The 'float' type is portable and usable without any need for any extra typedefs.
#ifdef EPSN
#undef EPSN
#endif//EPSN
#define EPSN (0.000001f) // (1e-6) // desired float precision
#endif //WELS_TYPE_DEFINES_H__

View File

@ -0,0 +1,95 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \brief Tool kits for decoder
* ( malloc, realloc, free, log output and PSNR calculation and so on )
*
* \date 03/10/2009 Created
*
*************************************************************************************
*/
#ifndef WELS_UTILS_H__
#define WELS_UTILS_H__
#include <stdarg.h>
#include "typedefs.h"
#define MAX_LOG_SIZE 1024
#define MAX_MBS_PER_FRAME 36864 //in accordance with max level support in Rec
/*
* Function pointer declaration for various tool sets
*/
// wels log output
typedef void (*PWelsLogCallbackFunc) (void* pCtx, const int32_t iLevel, const char* kpFmt, va_list argv);
typedef struct TagLogContext {
PWelsLogCallbackFunc pfLog;
void* pLogCtx;
void* pCodecInstance;
} SLogContext;
#ifdef __GNUC__
extern void WelsLog (SLogContext* pCtx, int32_t iLevel, const char* kpFmt, ...) __attribute__ ((__format__ (__printf__,
3,
4)));
#else
extern void WelsLog (SLogContext* pCtx, int32_t iLevel, const char* kpFmt, ...);
#endif
/*
* PSNR calculation routines
*/
/*!
*************************************************************************************
* \brief PSNR calculation utilization in Wels
*
* \param kpTarPic target picture to be calculated in Picture pData format
* \param kiTarStride stride of target picture pData pBuffer
* \param kpRefPic base referencing picture samples
* \param kiRefStride stride of reference picture pData pBuffer
* \param kiWidth picture iWidth in pixel
* \param kiHeight picture iHeight in pixel
*
* \return actual PSNR result;
*
* \note N/A
*************************************************************************************
*/
float WelsCalcPsnr (const void* kpTarPic,
const int32_t kiTarStride,
const void* kpRefPic,
const int32_t kiRefStride,
const int32_t kiWidth,
const int32_t kiHeight);
#endif//WELS_UTILS_H__

View File

@ -0,0 +1,10 @@
#ifndef VERSION_H
#define VERSION_H
#ifdef GENERATED_VERSION_HEADER
#include "version_gen.h"
#else
#define VERSION_NUMBER "openh264 default: 1.4"
#endif
#endif // VERSION_H

View File

@ -0,0 +1,6 @@
#ifndef VERSION_GEN_H
#define VERSION_GEN_H
#define VERSION_NUMBER $FULL_VERSION
#endif // VERSION_GEN_H

View File

@ -0,0 +1,64 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef WELS_CODEC_TRACE
#define WELS_CODEC_TRACE
#include <stdarg.h>
#include "typedefs.h"
#include "utils.h"
#include "codec_app_def.h"
#include "codec_api.h"
class welsCodecTrace {
public:
welsCodecTrace();
~welsCodecTrace();
void SetCodecInstance (void* pCodecInstance);
void SetTraceLevel (const int32_t kiLevel);
void SetTraceCallback (WelsTraceCallback func);
void SetTraceCallbackContext (void* pCtx);
private:
static void StaticCodecTrace (void* pCtx, const int32_t kiLevel, const char* kpStrFormat, va_list vl);
void CodecTrace (const int32_t kiLevel, const char* kpStrFormat, va_list vl);
int32_t m_iTraceLevel;
WelsTraceCallback m_fpTrace;
void* m_pTraceCtx;
public:
SLogContext m_sLogCtx;
};
#endif //WELS_CODEC_TRACE

View File

@ -0,0 +1,373 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
//wels_common_defs.h
#ifndef WELS_COMMON_DEFS_H__
#define WELS_COMMON_DEFS_H__
#include "typedefs.h"
#include "macros.h"
#include "codec_app_def.h"
namespace WelsCommon {
/*common use table*/
#define CTX_NA 0
#define WELS_CONTEXT_COUNT 460
#define LEVEL_NUMBER 17
typedef struct TagLevelLimits {
ELevelIdc uiLevelIdc; // level idc
uint32_t uiMaxMBPS; // Max macroblock processing rate(MB/s)
uint32_t uiMaxFS; // Max frame sizea(MBs)
uint32_t uiMaxDPBMbs;// Max decoded picture buffer size(MBs)
uint32_t uiMaxBR; // Max video bit rate
uint32_t uiMaxCPB; // Max CPB size
int16_t iMinVmv; // Vertical MV component range upper bound
int16_t iMaxVmv; // Vertical MV component range lower bound
uint16_t uiMinCR; // Min compression ration
int16_t iMaxMvsPer2Mb; // Max number of motion vectors per two consecutive MBs
} SLevelLimits;
#define CpbBrNalFactor 1200 //baseline,main,and extended profiles.
extern const SLevelLimits g_ksLevelLimits[LEVEL_NUMBER];
extern const uint32_t g_kuiLevelMaps[LEVEL_NUMBER];
extern const uint8_t g_kuiMbCountScan4Idx[24];
extern const uint8_t g_kuiCache30ScanIdx[16];
extern const uint8_t g_kuiCache48CountScan4Idx[24];
extern const uint8_t g_kuiMatrixV[6][8][8];
extern const uint8_t g_kuiDequantScaling4x4Default[2][16];
extern const uint8_t g_kuiDequantScaling8x8Default[2][64];
extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16);
extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff8x8[52][64], 16);
extern const uint8_t g_kuiChromaQpTable[52];
extern const uint8_t g_kuiCabacRangeLps[64][4];
extern const int8_t g_kiCabacGlobalContextIdx[WELS_CONTEXT_COUNT][4][2];
extern const uint8_t g_kuiStateTransTable[64][2];
extern const uint32_t g_kuiGolombUELength[256];
/*
* NAL Unit Type (5 Bits)
*/
enum EWelsNalUnitType {
NAL_UNIT_UNSPEC_0 = 0,
NAL_UNIT_CODED_SLICE = 1,
NAL_UNIT_CODED_SLICE_DPA = 2,
NAL_UNIT_CODED_SLICE_DPB = 3,
NAL_UNIT_CODED_SLICE_DPC = 4,
NAL_UNIT_CODED_SLICE_IDR = 5,
NAL_UNIT_SEI = 6,
NAL_UNIT_SPS = 7,
NAL_UNIT_PPS = 8,
NAL_UNIT_AU_DELIMITER = 9,
NAL_UNIT_END_OF_SEQ = 10,
NAL_UNIT_END_OF_STR = 11,
NAL_UNIT_FILLER_DATA = 12,
NAL_UNIT_SPS_EXT = 13,
NAL_UNIT_PREFIX = 14,
NAL_UNIT_SUBSET_SPS = 15,
NAL_UNIT_DEPTH_PARAM = 16, // NAL_UNIT_RESV_16
NAL_UNIT_RESV_17 = 17,
NAL_UNIT_RESV_18 = 18,
NAL_UNIT_AUX_CODED_SLICE = 19,
NAL_UNIT_CODED_SLICE_EXT = 20,
NAL_UNIT_MVC_SLICE_EXT = 21, // NAL_UNIT_RESV_21
NAL_UNIT_RESV_22 = 22,
NAL_UNIT_RESV_23 = 23,
NAL_UNIT_UNSPEC_24 = 24,
NAL_UNIT_UNSPEC_25 = 25,
NAL_UNIT_UNSPEC_26 = 26,
NAL_UNIT_UNSPEC_27 = 27,
NAL_UNIT_UNSPEC_28 = 28,
NAL_UNIT_UNSPEC_29 = 29,
NAL_UNIT_UNSPEC_30 = 30,
NAL_UNIT_UNSPEC_31 = 31
};
/*
* NAL Reference IDC (2 Bits)
*/
enum EWelsNalRefIdc {
NRI_PRI_LOWEST = 0,
NRI_PRI_LOW = 1,
NRI_PRI_HIGH = 2,
NRI_PRI_HIGHEST = 3
};
/*
* VCL TYPE
*/
enum EVclType {
NON_VCL = 0,
VCL = 1,
NOT_APP = 2
};
/*
* vcl type map for given NAL unit type and corresponding H264 type (0: AVC; 1: SVC).
*/
extern const EVclType g_keTypeMap[32][2];
#define IS_VCL_NAL(t, ext_idx) (g_keTypeMap[t][ext_idx] == VCL)
#define IS_PARAM_SETS_NALS(t) ( (t) == NAL_UNIT_SPS || (t) == NAL_UNIT_PPS || (t) == NAL_UNIT_SUBSET_SPS )
#define IS_SPS_NAL(t) ( (t) == NAL_UNIT_SPS )
#define IS_SUBSET_SPS_NAL(t) ( (t) == NAL_UNIT_SUBSET_SPS )
#define IS_PPS_NAL(t) ( (t) == NAL_UNIT_PPS )
#define IS_SEI_NAL(t) ( (t) == NAL_UNIT_SEI )
#define IS_AU_DELIMITER_NAL(t) ( (t) == NAL_UNIT_AU_DELIMITER )
#define IS_PREFIX_NAL(t) ( (t) == NAL_UNIT_PREFIX )
#define IS_SUBSET_SPS_USED(t) ( (t) == NAL_UNIT_SUBSET_SPS || (t) == NAL_UNIT_CODED_SLICE_EXT )
#define IS_VCL_NAL_AVC_BASE(t) ( (t) == NAL_UNIT_CODED_SLICE || (t) == NAL_UNIT_CODED_SLICE_IDR )
#define IS_NEW_INTRODUCED_SVC_NAL(t) ( (t) == NAL_UNIT_PREFIX || (t) == NAL_UNIT_CODED_SLICE_EXT )
/* Base SSlice Types
* Invalid in case of eSliceType exceeds 9,
* Need trim when eSliceType > 4 as fixed SliceType(eSliceType-4),
* meaning mapped version after eSliceType minus 4.
*/
enum EWelsSliceType {
P_SLICE = 0,
B_SLICE = 1,
I_SLICE = 2,
SP_SLICE = 3,
SI_SLICE = 4,
UNKNOWN_SLICE = 5
};
/* SSlice Types in scalable extension */
enum ESliceTypeExt {
EP_SLICE = 0, // EP_SLICE: 0, 5
EB_SLICE = 1, // EB_SLICE: 1, 6
EI_SLICE = 2 // EI_SLICE: 2, 7
};
/* List Index */
enum EListIndex {
LIST_0 = 0,
LIST_1 = 1,
LIST_A = 2
};
/* Motion Vector components */
enum EMvComp {
MV_X = 0,
MV_Y = 1,
MV_A = 2
};
/* Chroma Components */
enum EChromaComp {
CHROMA_CB = 0,
CHROMA_CR = 1,
CHROMA_A = 2
};
/*
* Memory Management Control Operation (MMCO) code
*/
enum EMmcoCode {
MMCO_END = 0,
MMCO_SHORT2UNUSED = 1,
MMCO_LONG2UNUSED = 2,
MMCO_SHORT2LONG = 3,
MMCO_SET_MAX_LONG = 4,
MMCO_RESET = 5,
MMCO_LONG = 6
};
enum EVuiVideoFormat {
VUI_COMPONENT = 0,
VUI_PAL = 1,
VUI_NTSC = 2,
VUI_SECAM = 3,
VUI_MAC = 4,
VUI_UNSPECIFIED = 5,
VUI_RESERVED1 = 6,
VUI_RESERVED2 = 7
};
/*
* Bit-stream auxiliary reading / writing
*/
typedef struct TagBitStringAux {
uint8_t* pStartBuf; // buffer to start position
uint8_t* pEndBuf; // buffer + length
int32_t iBits; // count bits of overall bitstreaming input
intX_t iIndex; //only for cavlc usage
uint8_t* pCurBuf; // current reading position
uint32_t uiCurBits;
int32_t iLeftBits; // count number of available bits left ([1, 8]),
// need pointer to next byte start position in case 0 bit left then 8 instead
} SBitStringAux, *PBitStringAux;
/* NAL Unix Header in AVC, refer to Page 56 in JVT X201wcm */
typedef struct TagNalUnitHeader {
uint8_t uiForbiddenZeroBit;
uint8_t uiNalRefIdc;
EWelsNalUnitType eNalUnitType;
uint8_t uiReservedOneByte; // only padding usage
} SNalUnitHeader, *PNalUnitHeader;
/* NAL Unit Header in scalable extension syntax, refer to Page 390 in JVT X201wcm */
typedef struct TagNalUnitHeaderExt {
SNalUnitHeader sNalUnitHeader;
// uint8_t reserved_one_bit;
bool bIdrFlag;
uint8_t uiPriorityId;
int8_t iNoInterLayerPredFlag; // change as int8_t to support 3 values probably in encoder
uint8_t uiDependencyId;
uint8_t uiQualityId;
uint8_t uiTemporalId;
bool bUseRefBasePicFlag;
bool bDiscardableFlag;
bool bOutputFlag;
uint8_t uiReservedThree2Bits;
// Derived variable(s)
uint8_t uiLayerDqId;
bool bNalExtFlag;
} SNalUnitHeaderExt, *PNalUnitHeaderExt;
/* AVC MB types*/
#define MB_TYPE_INTRA4x4 0x00000001
#define MB_TYPE_INTRA16x16 0x00000002
#define MB_TYPE_INTRA8x8 0x00000004
#define MB_TYPE_16x16 0x00000008
#define MB_TYPE_16x8 0x00000010
#define MB_TYPE_8x16 0x00000020
#define MB_TYPE_8x8 0x00000040
#define MB_TYPE_8x8_REF0 0x00000080
#define MB_TYPE_SKIP 0x00000100
#define MB_TYPE_INTRA_PCM 0x00000200
#define MB_TYPE_INTRA_BL 0x00000400
#define MB_TYPE_DIRECT 0x00000800
#define MB_TYPE_P0L0 0x00001000
#define MB_TYPE_P1L0 0x00002000
#define MB_TYPE_P0L1 0x00004000
#define MB_TYPE_P1L1 0x00008000
#define MB_TYPE_L0 (MB_TYPE_P0L0 | MB_TYPE_P1L0)
#define MB_TYPE_L1 (MB_TYPE_P0L1 | MB_TYPE_P1L1)
#define SUB_MB_TYPE_8x8 0x00000001
#define SUB_MB_TYPE_8x4 0x00000002
#define SUB_MB_TYPE_4x8 0x00000004
#define SUB_MB_TYPE_4x4 0x00000008
#define MB_TYPE_INTRA (MB_TYPE_INTRA4x4 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA8x8 | MB_TYPE_INTRA_PCM)
#define MB_TYPE_INTER (MB_TYPE_16x16 | MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_8x8_REF0 | MB_TYPE_SKIP | MB_TYPE_DIRECT)
#define IS_INTRA4x4(type) ( MB_TYPE_INTRA4x4 == (type) )
#define IS_INTRA8x8(type) ( MB_TYPE_INTRA8x8 == (type) )
#define IS_INTRANxN(type) ( MB_TYPE_INTRA4x4 == (type) || MB_TYPE_INTRA8x8 == (type) )
#define IS_INTRA16x16(type) ( MB_TYPE_INTRA16x16 == (type) )
#define IS_INTRA(type) ( (type)&MB_TYPE_INTRA )
#define IS_INTER(type) ( (type)&MB_TYPE_INTER )
#define IS_INTER_16x16(type) ( (type)&MB_TYPE_16x16 )
#define IS_INTER_16x8(type) ( (type)&MB_TYPE_16x8 )
#define IS_INTER_8x16(type) ( (type)&MB_TYPE_8x16 )
#define IS_TYPE_L0(type) ( (type)&MB_TYPE_L0 )
#define IS_TYPE_L1(type) ( (type)&MB_TYPE_L1 )
#define IS_DIR(a, part, list) ((a) & (MB_TYPE_P0L0<<((part)+2*(list))))
#define IS_SKIP(type) ( ((type)&MB_TYPE_SKIP) != 0 )
#define IS_DIRECT(type) ( ((type)&MB_TYPE_DIRECT) != 0 )
#define IS_SVC_INTER(type) IS_INTER(type)
#define IS_I_BL(type) ( (type) == MB_TYPE_INTRA_BL )
#define IS_SVC_INTRA(type) ( IS_I_BL(type) || IS_INTRA(type) )
#define IS_Inter_8x8(type) ( ((type)&MB_TYPE_8x8) != 0)
#define IS_SUB_8x8(sub_type) (((sub_type)&SUB_MB_TYPE_8x8) != 0)
#define IS_SUB_8x4(sub_type) (((sub_type)&SUB_MB_TYPE_8x4) != 0)
#define IS_SUB_4x8(sub_type) (((sub_type)&SUB_MB_TYPE_4x8) != 0)
#define IS_SUB_4x4(sub_type) (((sub_type)&SUB_MB_TYPE_4x4) != 0)
#define REF_NOT_AVAIL -2
#define REF_NOT_IN_LIST -1 //intra
/////////intra16x16 Luma
#define I16_PRED_INVALID -1
#define I16_PRED_V 0
#define I16_PRED_H 1
#define I16_PRED_DC 2
#define I16_PRED_P 3
#define I16_PRED_DC_L 4
#define I16_PRED_DC_T 5
#define I16_PRED_DC_128 6
#define I16_PRED_DC_A 7
//////////intra4x4 Luma
// Here, I8x8 also use these definitions
#define I4_PRED_INVALID 0
#define I4_PRED_V 0
#define I4_PRED_H 1
#define I4_PRED_DC 2
#define I4_PRED_DDL 3 //diagonal_down_left
#define I4_PRED_DDR 4 //diagonal_down_right
#define I4_PRED_VR 5 //vertical_right
#define I4_PRED_HD 6 //horizon_down
#define I4_PRED_VL 7 //vertical_left
#define I4_PRED_HU 8 //horizon_up
#define I4_PRED_DC_L 9
#define I4_PRED_DC_T 10
#define I4_PRED_DC_128 11
#define I4_PRED_DDL_TOP 12 //right-top replacing by padding rightmost pixel of top
#define I4_PRED_VL_TOP 13 //right-top replacing by padding rightmost pixel of top
#define I4_PRED_A 14
//////////intra Chroma
#define C_PRED_INVALID -1
#define C_PRED_DC 0
#define C_PRED_H 1
#define C_PRED_V 2
#define C_PRED_P 3
#define C_PRED_DC_L 4
#define C_PRED_DC_T 5
#define C_PRED_DC_128 6
#define C_PRED_A 7
}
#endif//WELS_COMMON_DEFS_H__

View File

@ -0,0 +1,63 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef WELS_CONST_COMMON_H__
#define WELS_CONST_COMMON_H__
// Miscellaneous sizing infos
#ifndef MAX_FNAME_LEN
#define MAX_FNAME_LEN 256 // maximal length of file name in char size
#endif//MAX_FNAME_LEN
#ifndef WELS_LOG_BUF_SIZE
#define WELS_LOG_BUF_SIZE 4096
#endif//WELS_LOG_BUF_SIZE
#ifndef MAX_TRACE_LOG_SIZE
#define MAX_TRACE_LOG_SIZE (50 * (1<<20)) // max trace log size: 50 MB, overwrite occur if log file size exceeds this size
#endif//MAX_TRACE_LOG_SIZE
/* MB width in pixels for specified colorspace I420 usually used in codec */
#define MB_WIDTH_LUMA 16
#define MB_WIDTH_CHROMA (MB_WIDTH_LUMA>>1)
/* MB height in pixels for specified colorspace I420 usually used in codec */
#define MB_HEIGHT_LUMA 16
#define MB_HEIGHT_CHROMA (MB_HEIGHT_LUMA>>1)
#define MB_COEFF_LIST_SIZE (256+((MB_WIDTH_CHROMA*MB_HEIGHT_CHROMA)<<1))
#define MB_PARTITION_SIZE 4 // Macroblock partition size in 8x8 sub-blocks
#define MB_BLOCK4x4_NUM 16
#define MB_BLOCK8x8_NUM 4
#define MAX_SPS_COUNT 32 // Count number of SPS
#define BASE_QUALITY_ID 0
#endif//WELS_CONST_COMMON_H__

View File

@ -0,0 +1,88 @@
/*!
* \copy
* Copyright (c) 2009-2015, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsTaskThread.cpp
*
* \brief functions for TaskThread
*
* \date 5/09/2012 Created
*
*************************************************************************************
*/
#include "WelsTaskThread.h"
namespace WelsCommon {
CWelsTaskThread::CWelsTaskThread (IWelsTaskThreadSink* pSink) : m_pSink (pSink) {
WelsThreadSetName ("CWelsTaskThread");
m_uiID = (uintptr_t) (this);
m_pTask = NULL;
}
CWelsTaskThread::~CWelsTaskThread() {
}
void CWelsTaskThread::ExecuteTask() {
CWelsAutoLock cLock (m_cLockTask);
if (m_pSink) {
m_pSink->OnTaskStart (this, m_pTask);
}
if (m_pTask) {
m_pTask->Execute();
}
if (m_pSink) {
m_pSink->OnTaskStop (this, m_pTask);
}
m_pTask = NULL;
}
WELS_THREAD_ERROR_CODE CWelsTaskThread::SetTask (WelsCommon::IWelsTask* pTask) {
CWelsAutoLock cLock (m_cLockTask);
if (!GetRunning()) {
return WELS_THREAD_ERROR_GENERAL;
}
WelsMutexLock(&m_hMutex);
m_pTask = pTask;
WelsMutexUnlock(&m_hMutex);
SignalThread();
return WELS_THREAD_ERROR_OK;
}
}

View File

@ -0,0 +1,126 @@
/*!
* \copy
* Copyright (c) 2009-2015, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsThreadPool.cpp
*
* \brief functions for Thread Pool
*
* \date 5/09/2012 Created
*
*************************************************************************************
*/
#include "WelsThread.h"
namespace WelsCommon {
CWelsThread::CWelsThread() :
m_hThread (0),
m_bRunning (false),
m_bEndFlag (false) {
WelsEventOpen (&m_hEvent);
WelsMutexInit(&m_hMutex);
m_iConVar = 1;
}
CWelsThread::~CWelsThread() {
Kill();
WelsEventClose (&m_hEvent);
WelsMutexDestroy(&m_hMutex);
}
void CWelsThread::Thread() {
while (true) {
WelsEventWait (&m_hEvent,&m_hMutex,m_iConVar);
if (GetEndFlag()) {
break;
}
m_iConVar = 1;
ExecuteTask();//in ExecuteTask there will be OnTaskStop which opens the potential new Signaling of next run, so the setting of m_iConVar = 1 should be before ExecuteTask()
}
SetRunning (false);
}
WELS_THREAD_ERROR_CODE CWelsThread::Start() {
#ifndef __APPLE__
if (NULL == m_hEvent) {
return WELS_THREAD_ERROR_GENERAL;
}
#endif
if (GetRunning()) {
return WELS_THREAD_ERROR_OK;
}
SetEndFlag (false);
WELS_THREAD_ERROR_CODE rc = WelsThreadCreate (&m_hThread,
(LPWELS_THREAD_ROUTINE)TheThread, this, 0);
if (WELS_THREAD_ERROR_OK != rc) {
return rc;
}
while (!GetRunning()) {
WelsSleep (1);
}
return WELS_THREAD_ERROR_OK;
}
void CWelsThread::Kill() {
if (!GetRunning()) {
return;
}
SetEndFlag (true);
WelsEventSignal (&m_hEvent,&m_hMutex,&m_iConVar);
WelsThreadJoin (m_hThread);
return;
}
WELS_THREAD_ROUTINE_TYPE CWelsThread::TheThread (void* pParam) {
CWelsThread* pThis = static_cast<CWelsThread*> (pParam);
pThis->SetRunning (true);
pThis->Thread();
WELS_THREAD_ROUTINE_RETURN (NULL);
}
}

View File

@ -0,0 +1,535 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsThreadLib.c
*
* \brief Interfaces introduced in thread programming
*
* \date 11/17/2009 Created
*
*************************************************************************************
*/
#ifdef __linux__
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <sched.h>
#elif !defined(_WIN32) && !defined(__CYGWIN__)
#include <sys/types.h>
#include <sys/param.h>
#include <unistd.h>
#ifndef __Fuchsia__
#include <sys/sysctl.h>
#endif
#ifdef __APPLE__
#define HW_NCPU_NAME "hw.logicalcpu"
#else
#define HW_NCPU_NAME "hw.ncpu"
#endif
#endif
#ifdef ANDROID_NDK
#include <cpu-features.h>
#endif
#ifdef __ANDROID__
#include <android/api-level.h>
#endif
#include "WelsThreadLib.h"
#include <stdio.h>
#include <stdlib.h>
#if defined(_WIN32) || defined(__CYGWIN__)
WELS_THREAD_ERROR_CODE WelsMutexInit (WELS_MUTEX* mutex) {
InitializeCriticalSection (mutex);
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE WelsMutexLock (WELS_MUTEX* mutex) {
EnterCriticalSection (mutex);
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE WelsMutexUnlock (WELS_MUTEX* mutex) {
LeaveCriticalSection (mutex);
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE WelsMutexDestroy (WELS_MUTEX* mutex) {
DeleteCriticalSection (mutex);
return WELS_THREAD_ERROR_OK;
}
#else /* _WIN32 */
WELS_THREAD_ERROR_CODE WelsMutexInit (WELS_MUTEX* mutex) {
return pthread_mutex_init (mutex, NULL);
}
WELS_THREAD_ERROR_CODE WelsMutexLock (WELS_MUTEX* mutex) {
return pthread_mutex_lock (mutex);
}
WELS_THREAD_ERROR_CODE WelsMutexUnlock (WELS_MUTEX* mutex) {
return pthread_mutex_unlock (mutex);
}
WELS_THREAD_ERROR_CODE WelsMutexDestroy (WELS_MUTEX* mutex) {
return pthread_mutex_destroy (mutex);
}
#endif /* !_WIN32 */
#if defined(_WIN32) || defined(__CYGWIN__)
WELS_THREAD_ERROR_CODE WelsEventOpen (WELS_EVENT* event, const char* event_name) {
WELS_EVENT h = CreateEvent (NULL, FALSE, FALSE, NULL);
*event = h;
if (h == NULL) {
return WELS_THREAD_ERROR_GENERAL;
}
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE WelsEventSignal (WELS_EVENT* event, WELS_MUTEX *pMutex, int* iCondition) {
(*iCondition) --;
if ((*iCondition) <= 0) {
if (SetEvent (*event)) {
return WELS_THREAD_ERROR_OK;
}
}
return WELS_THREAD_ERROR_GENERAL;
}
WELS_THREAD_ERROR_CODE WelsEventWait (WELS_EVENT* event, WELS_MUTEX* pMutex, int& iCondition) {
return WaitForSingleObject (*event, INFINITE);
}
WELS_THREAD_ERROR_CODE WelsEventWaitWithTimeOut (WELS_EVENT* event, uint32_t dwMilliseconds, WELS_MUTEX* pMutex) {
return WaitForSingleObject (*event, dwMilliseconds);
}
WELS_THREAD_ERROR_CODE WelsMultipleEventsWaitSingleBlocking (uint32_t nCount,
WELS_EVENT* event_list, WELS_EVENT* master_even, WELS_MUTEX* pMutext) {
// Don't need/use the master event for anything, since windows has got WaitForMultipleObjects
return WaitForMultipleObjects (nCount, event_list, FALSE, INFINITE);
}
WELS_THREAD_ERROR_CODE WelsEventClose (WELS_EVENT* event, const char* event_name) {
CloseHandle (*event);
*event = NULL;
return WELS_THREAD_ERROR_OK;
}
#ifndef WP80
void WelsSleep (uint32_t dwMilliSecond) {
::Sleep (dwMilliSecond);
}
#else
void WelsSleep (uint32_t dwMilliSecond) {
static WELS_EVENT hSleepEvent = NULL;
if (!hSleepEvent) {
WELS_EVENT hLocalSleepEvent = NULL;
WELS_THREAD_ERROR_CODE ret = WelsEventOpen (&hLocalSleepEvent);
if (WELS_THREAD_ERROR_OK != ret) {
return;
}
WELS_EVENT hPreviousEvent = InterlockedCompareExchangePointerRelease (&hSleepEvent, hLocalSleepEvent, NULL);
if (hPreviousEvent) {
WelsEventClose (&hLocalSleepEvent);
}
//On this singleton usage idea of using InterlockedCompareExchangePointerRelease:
// similar idea of can be found at msdn blog when introducing InterlockedCompareExchangePointerRelease
}
WaitForSingleObject (hSleepEvent, dwMilliSecond);
}
#endif
WELS_THREAD_ERROR_CODE WelsThreadCreate (WELS_THREAD_HANDLE* thread, LPWELS_THREAD_ROUTINE routine,
void* arg, WELS_THREAD_ATTR attr) {
WELS_THREAD_HANDLE h = CreateThread (NULL, 0, routine, arg, 0, NULL);
if (h == NULL) {
return WELS_THREAD_ERROR_GENERAL;
}
* thread = h;
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE WelsThreadSetName (const char* thread_name) {
// do nothing
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE WelsThreadJoin (WELS_THREAD_HANDLE thread) {
WaitForSingleObject (thread, INFINITE);
CloseHandle (thread);
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_HANDLE WelsThreadSelf() {
return GetCurrentThread();
}
WELS_THREAD_ERROR_CODE WelsQueryLogicalProcessInfo (WelsLogicalProcessInfo* pInfo) {
SYSTEM_INFO si;
GetSystemInfo (&si);
pInfo->ProcessorCount = si.dwNumberOfProcessors;
return WELS_THREAD_ERROR_OK;
}
#else //platform: #ifdef _WIN32
WELS_THREAD_ERROR_CODE WelsThreadCreate (WELS_THREAD_HANDLE* thread, LPWELS_THREAD_ROUTINE routine,
void* arg, WELS_THREAD_ATTR attr) {
WELS_THREAD_ERROR_CODE err = 0;
pthread_attr_t at;
err = pthread_attr_init (&at);
if (err)
return err;
#if !defined(__ANDROID__) && !defined(__Fuchsia__)
err = pthread_attr_setscope (&at, PTHREAD_SCOPE_SYSTEM);
if (err)
return err;
err = pthread_attr_setschedpolicy (&at, SCHED_FIFO);
if (err)
return err;
#endif
err = pthread_create (thread, &at, routine, arg);
pthread_attr_destroy (&at);
return err;
}
WELS_THREAD_ERROR_CODE WelsThreadSetName (const char* thread_name) {
#ifdef APPLE_IOS
pthread_setname_np (thread_name);
#endif
#if defined(__ANDROID__) && __ANDROID_API__ >= 9
pthread_setname_np (pthread_self(), thread_name);
#endif
// do nothing
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE WelsThreadJoin (WELS_THREAD_HANDLE thread) {
return pthread_join (thread, NULL);
}
WELS_THREAD_HANDLE WelsThreadSelf() {
return pthread_self();
}
// unnamed semaphores aren't supported on OS X
WELS_THREAD_ERROR_CODE WelsEventOpen (WELS_EVENT* p_event, const char* event_name) {
#ifdef __APPLE__
WELS_THREAD_ERROR_CODE err= pthread_cond_init (p_event, NULL);
return err;
#else
WELS_EVENT event = (WELS_EVENT) malloc (sizeof (*event));
if (event == NULL){
*p_event = NULL;
return WELS_THREAD_ERROR_GENERAL;
}
WELS_THREAD_ERROR_CODE err = sem_init (event, 0, 0);
if (!err) {
*p_event = event;
return err;
}
free (event);
*p_event = NULL;
return err;
#endif
}
WELS_THREAD_ERROR_CODE WelsEventClose (WELS_EVENT* event, const char* event_name) {
//printf("event_close:%x, %s\n", event, event_name);
#ifdef __APPLE__
WELS_THREAD_ERROR_CODE err = pthread_cond_destroy (event);
return err;
#else
WELS_THREAD_ERROR_CODE err = sem_destroy (*event); // match with sem_init
free (*event);
*event = NULL;
return err;
#endif
}
void WelsSleep (uint32_t dwMilliSecond) {
usleep (dwMilliSecond * 1000);
}
WELS_THREAD_ERROR_CODE WelsEventSignal (WELS_EVENT* event, WELS_MUTEX *pMutex, int* iCondition) {
WELS_THREAD_ERROR_CODE err = 0;
//fprintf( stderr, "before signal it, event=%x iCondition= %d..\n", event, *iCondition );
#ifdef __APPLE__
WelsMutexLock (pMutex);
(*iCondition) --;
WelsMutexUnlock (pMutex);
if ((*iCondition) <= 0) {
err = pthread_cond_signal (event);
//fprintf( stderr, "signal it, event=%x iCondition= %d..\n",event, *iCondition );
}
#else
(*iCondition) --;
if ((*iCondition) <= 0) {
// int32_t val = 0;
// sem_getvalue(event, &val);
// fprintf( stderr, "before signal it, val= %d..\n",val );
if (event != NULL)
err = sem_post (*event);
// sem_getvalue(event, &val);
//fprintf( stderr, "signal it, event=%x iCondition= %d..\n",event, *iCondition );
}
#endif
//fprintf( stderr, "after signal it, event=%x iCondition= %d..\n",event, *iCondition );
return err;
}
WELS_THREAD_ERROR_CODE WelsEventWait (WELS_EVENT* event, WELS_MUTEX* pMutex, int& iCondition) {
#ifdef __APPLE__
int err = 0;
WelsMutexLock(pMutex);
//fprintf( stderr, "WelsEventWait event %x %d..\n", event, iCondition );
while (iCondition>0) {
err = pthread_cond_wait (event, pMutex);
}
WelsMutexUnlock(pMutex);
return err;
#else
return sem_wait (*event); // blocking until signaled
#endif
}
WELS_THREAD_ERROR_CODE WelsEventWaitWithTimeOut (WELS_EVENT* event, uint32_t dwMilliseconds, WELS_MUTEX* pMutex) {
if (dwMilliseconds != (uint32_t) - 1) {
#if defined(__APPLE__)
return pthread_cond_wait (event, pMutex);
#else
return sem_wait (*event);
#endif
} else {
struct timespec ts;
struct timeval tv;
gettimeofday (&tv, 0);
ts.tv_nsec = tv.tv_usec * 1000 + dwMilliseconds * 1000000;
ts.tv_sec = tv.tv_sec + ts.tv_nsec / 1000000000;
ts.tv_nsec %= 1000000000;
#if defined(__APPLE__)
return pthread_cond_timedwait (event, pMutex, &ts);
#else
return sem_timedwait (*event, &ts);
#endif
}
}
WELS_THREAD_ERROR_CODE WelsMultipleEventsWaitSingleBlocking (uint32_t nCount,
WELS_EVENT* event_list, WELS_EVENT* master_event, WELS_MUTEX* pMutex) {
uint32_t nIdx = 0;
uint32_t uiAccessTime = 2; // 2 us once
if (nCount == 0)
return WELS_THREAD_ERROR_WAIT_FAILED;
#if defined(__APPLE__)
if (master_event != NULL) {
// This design relies on the events actually being semaphores;
// if multiple events in the list have been signalled, the master
// event should have a similar count (events in windows can't keep
// track of the actual count, but the master event isn't needed there
// since it uses WaitForMultipleObjects).
int32_t err = pthread_cond_wait (master_event, pMutex);
if (err != WELS_THREAD_ERROR_OK)
return err;
uiAccessTime = 0; // no blocking, just quickly loop through all to find the one that was signalled
}
while (1) {
nIdx = 0; // access each event by order
while (nIdx < nCount) {
int32_t err = 0;
int32_t wait_count = 0;
/*
* although such interface is not used in __GNUC__ like platform, to use
* pthread_cond_timedwait() might be better choice if need
*/
do {
err = pthread_cond_wait (&event_list[nIdx], pMutex);
if (WELS_THREAD_ERROR_OK == err)
return WELS_THREAD_ERROR_WAIT_OBJECT_0 + nIdx;
else if (wait_count > 0 || uiAccessTime == 0)
break;
usleep (uiAccessTime);
++ wait_count;
} while (1);
// we do need access next event next time
++ nIdx;
}
usleep (1); // switch to working threads
if (master_event != NULL) {
// A master event was used and was signalled, but none of the events in the
// list was found to be signalled, thus wait a little more when rechecking
// the list to avoid busylooping here.
// If we ever hit this codepath it's mostly a bug in the code that signals
// the events.
uiAccessTime = 2;
}
}
#else
if (master_event != NULL) {
// This design relies on the events actually being semaphores;
// if multiple events in the list have been signalled, the master
// event should have a similar count (events in windows can't keep
// track of the actual count, but the master event isn't needed there
// since it uses WaitForMultipleObjects).
int32_t err = sem_wait (*master_event);
if (err != WELS_THREAD_ERROR_OK)
return err;
uiAccessTime = 0; // no blocking, just quickly loop through all to find the one that was signalled
}
while (1) {
nIdx = 0; // access each event by order
while (nIdx < nCount) {
int32_t err = 0;
int32_t wait_count = 0;
/*
* although such interface is not used in __GNUC__ like platform, to use
* pthread_cond_timedwait() might be better choice if need
*/
do {
err = sem_trywait (event_list[nIdx]);
if (WELS_THREAD_ERROR_OK == err)
return WELS_THREAD_ERROR_WAIT_OBJECT_0 + nIdx;
else if (wait_count > 0 || uiAccessTime == 0)
break;
usleep (uiAccessTime);
++ wait_count;
} while (1);
// we do need access next event next time
++ nIdx;
}
usleep (1); // switch to working threads
if (master_event != NULL) {
// A master event was used and was signalled, but none of the events in the
// list was found to be signalled, thus wait a little more when rechecking
// the list to avoid busylooping here.
// If we ever hit this codepath it's mostly a bug in the code that signals
// the events.
uiAccessTime = 2;
}
}
#endif
return WELS_THREAD_ERROR_WAIT_FAILED;
}
WELS_THREAD_ERROR_CODE WelsQueryLogicalProcessInfo (WelsLogicalProcessInfo* pInfo) {
#ifdef ANDROID_NDK
pInfo->ProcessorCount = android_getCpuCount();
return WELS_THREAD_ERROR_OK;
#elif defined(__linux__)
cpu_set_t cpuset;
CPU_ZERO (&cpuset);
if (!sched_getaffinity (0, sizeof (cpuset), &cpuset)) {
#ifdef CPU_COUNT
pInfo->ProcessorCount = CPU_COUNT (&cpuset);
#else
int32_t count = 0;
for (int i = 0; i < CPU_SETSIZE; i++) {
if (CPU_ISSET (i, &cpuset)) {
count++;
}
}
pInfo->ProcessorCount = count;
#endif
} else {
pInfo->ProcessorCount = 1;
}
return WELS_THREAD_ERROR_OK;
#elif defined(__EMSCRIPTEN__)
// There is not yet a way to determine CPU count in emscripten JS environment.
pInfo->ProcessorCount = 1;
return WELS_THREAD_ERROR_OK;
#elif defined(__Fuchsia__)
pInfo->ProcessorCount = sysconf(_SC_NPROCESSORS_ONLN);
return WELS_THREAD_ERROR_OK;
#else
size_t len = sizeof (pInfo->ProcessorCount);
#if defined(__OpenBSD__)
int scname[] = { CTL_HW, HW_NCPU };
if (sysctl (scname, 2, &pInfo->ProcessorCount, &len, NULL, 0) == -1)
#else
if (sysctlbyname (HW_NCPU_NAME, &pInfo->ProcessorCount, &len, NULL, 0) == -1)
#endif
pInfo->ProcessorCount = 1;
return WELS_THREAD_ERROR_OK;
#endif//__linux__
}
#endif

View File

@ -0,0 +1,380 @@
/*!
* \copy
* Copyright (c) 2009-2015, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file WelsThreadPool.cpp
*
* \brief functions for Thread Pool
*
* \date 5/09/2012 Created
*
*************************************************************************************
*/
#include "typedefs.h"
#include "memory_align.h"
#include "WelsThreadPool.h"
namespace WelsCommon {
namespace {
CWelsLock& GetInitLock() {
static CWelsLock *initLock = new CWelsLock;
return *initLock;
}
}
int32_t CWelsThreadPool::m_iRefCount = 0;
int32_t CWelsThreadPool::m_iMaxThreadNum = DEFAULT_THREAD_NUM;
CWelsThreadPool* CWelsThreadPool::m_pThreadPoolSelf = NULL;
CWelsThreadPool::CWelsThreadPool() :
m_cWaitedTasks (NULL), m_cIdleThreads (NULL), m_cBusyThreads (NULL) {
}
CWelsThreadPool::~CWelsThreadPool() {
//fprintf(stdout, "CWelsThreadPool::~CWelsThreadPool: delete %x, %x, %x\n", m_cWaitedTasks, m_cIdleThreads, m_cBusyThreads);
if (0 != m_iRefCount) {
m_iRefCount = 0;
Uninit();
}
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::SetThreadNum (int32_t iMaxThreadNum) {
CWelsAutoLock cLock (GetInitLock());
if (m_iRefCount != 0) {
return WELS_THREAD_ERROR_GENERAL;
}
if (iMaxThreadNum <= 0) {
iMaxThreadNum = 1;
}
m_iMaxThreadNum = iMaxThreadNum;
return WELS_THREAD_ERROR_OK;
}
CWelsThreadPool* CWelsThreadPool::AddReference() {
CWelsAutoLock cLock (GetInitLock());
if (m_pThreadPoolSelf == NULL) {
m_pThreadPoolSelf = new CWelsThreadPool();
if (!m_pThreadPoolSelf) {
return NULL;
}
}
if (m_iRefCount == 0) {
if (WELS_THREAD_ERROR_OK != m_pThreadPoolSelf->Init()) {
m_pThreadPoolSelf->Uninit();
delete m_pThreadPoolSelf;
m_pThreadPoolSelf = NULL;
return NULL;
}
}
////fprintf(stdout, "m_iRefCount=%d, iMaxThreadNum=%d\n", m_iRefCount, m_iMaxThreadNum);
++ m_iRefCount;
//fprintf(stdout, "m_iRefCount2=%d\n", m_iRefCount);
return m_pThreadPoolSelf;
}
void CWelsThreadPool::RemoveInstance() {
CWelsAutoLock cLock (GetInitLock());
//fprintf(stdout, "m_iRefCount=%d\n", m_iRefCount);
-- m_iRefCount;
if (0 == m_iRefCount) {
StopAllRunning();
Uninit();
if (m_pThreadPoolSelf) {
delete m_pThreadPoolSelf;
m_pThreadPoolSelf = NULL;
}
//fprintf(stdout, "m_iRefCount=%d, IdleThreadNum=%d, BusyThreadNum=%d, WaitedTask=%d\n", m_iRefCount, GetIdleThreadNum(), GetBusyThreadNum(), GetWaitedTaskNum());
}
}
bool CWelsThreadPool::IsReferenced() {
CWelsAutoLock cLock (GetInitLock());
return (m_iRefCount > 0);
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::OnTaskStart (CWelsTaskThread* pThread, IWelsTask* pTask) {
AddThreadToBusyList (pThread);
//fprintf(stdout, "CWelsThreadPool::AddThreadToBusyList: Task %x at Thread %x\n", pTask, pThread);
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::OnTaskStop (CWelsTaskThread* pThread, IWelsTask* pTask) {
//fprintf(stdout, "CWelsThreadPool::OnTaskStop 0: Task %x at Thread %x Finished\n", pTask, pThread);
RemoveThreadFromBusyList (pThread);
AddThreadToIdleQueue (pThread);
if (pTask && pTask->GetSink()) {
//fprintf(stdout, "CWelsThreadPool::OnTaskStop 1: Task %x at Thread %x Finished, m_pSink=%x\n", pTask, pThread, pTask->GetSink());
pTask->GetSink()->OnTaskExecuted();
////fprintf(stdout, "CWelsThreadPool::OnTaskStop 1: Task %x at Thread %x Finished, m_pSink=%x\n", pTask, pThread, pTask->GetSink());
}
//if (m_pSink) {
// m_pSink->OnTaskExecuted (pTask);
//}
//fprintf(stdout, "CWelsThreadPool::OnTaskStop 2: Task %x at Thread %x Finished\n", pTask, pThread);
SignalThread();
//fprintf(stdout, "ThreadPool: Task %x at Thread %x Finished\n", pTask, pThread);
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::Init() {
//fprintf(stdout, "Enter WelsThreadPool Init\n");
CWelsAutoLock cLock (m_cLockPool);
m_cWaitedTasks = new CWelsNonDuplicatedList<IWelsTask>();
m_cIdleThreads = new CWelsNonDuplicatedList<CWelsTaskThread>();
m_cBusyThreads = new CWelsList<CWelsTaskThread>();
if (NULL == m_cWaitedTasks || NULL == m_cIdleThreads || NULL == m_cBusyThreads) {
return WELS_THREAD_ERROR_GENERAL;
}
for (int32_t i = 0; i < m_iMaxThreadNum; i++) {
if (WELS_THREAD_ERROR_OK != CreateIdleThread()) {
return WELS_THREAD_ERROR_GENERAL;
}
}
if (WELS_THREAD_ERROR_OK != Start()) {
return WELS_THREAD_ERROR_GENERAL;
}
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::StopAllRunning() {
WELS_THREAD_ERROR_CODE iReturn = WELS_THREAD_ERROR_OK;
ClearWaitedTasks();
while (GetBusyThreadNum() > 0) {
//WELS_INFO_TRACE ("CWelsThreadPool::Uninit - Waiting all thread to exit");
WelsSleep (10);
}
if (GetIdleThreadNum() != m_iMaxThreadNum) {
iReturn = WELS_THREAD_ERROR_GENERAL;
}
return iReturn;
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::Uninit() {
WELS_THREAD_ERROR_CODE iReturn = WELS_THREAD_ERROR_OK;
CWelsAutoLock cLock (m_cLockPool);
iReturn = StopAllRunning();
if (WELS_THREAD_ERROR_OK != iReturn) {
return iReturn;
}
m_cLockIdleTasks.Lock();
while (m_cIdleThreads->size() > 0) {
DestroyThread (m_cIdleThreads->begin());
m_cIdleThreads->pop_front();
}
m_cLockIdleTasks.Unlock();
Kill();
WELS_DELETE_OP (m_cWaitedTasks);
WELS_DELETE_OP (m_cIdleThreads);
WELS_DELETE_OP (m_cBusyThreads);
return iReturn;
}
void CWelsThreadPool::ExecuteTask() {
//fprintf(stdout, "ThreadPool: scheduled tasks: ExecuteTask\n");
CWelsTaskThread* pThread = NULL;
IWelsTask* pTask = NULL;
while (GetWaitedTaskNum() > 0) {
//fprintf(stdout, "ThreadPool: ExecuteTask: waiting task %d\n", GetWaitedTaskNum());
pThread = GetIdleThread();
if (pThread == NULL) {
//fprintf(stdout, "ThreadPool: ExecuteTask: no IdleThread\n");
break;
}
pTask = GetWaitedTask();
//fprintf(stdout, "ThreadPool: ExecuteTask = %x at thread %x\n", pTask, pThread);
if (pTask) {
pThread->SetTask (pTask);
} else {
AddThreadToIdleQueue (pThread);
}
}
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::QueueTask (IWelsTask* pTask) {
CWelsAutoLock cLock (m_cLockPool);
//fprintf(stdout, "CWelsThreadPool::QueueTask: %d, pTask=%x\n", m_iRefCount, pTask);
if (GetWaitedTaskNum() == 0) {
CWelsTaskThread* pThread = GetIdleThread();
if (pThread != NULL) {
//fprintf(stdout, "ThreadPool: ExecuteTask = %x at thread %x\n", pTask, pThread);
pThread->SetTask (pTask);
return WELS_THREAD_ERROR_OK;
}
}
//fprintf(stdout, "ThreadPool: AddTaskToWaitedList: %x\n", pTask);
if (false == AddTaskToWaitedList (pTask)) {
return WELS_THREAD_ERROR_GENERAL;
}
//fprintf(stdout, "ThreadPool: SignalThread: %x\n", pTask);
SignalThread();
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::CreateIdleThread() {
CWelsTaskThread* pThread = new CWelsTaskThread (this);
if (NULL == pThread) {
return WELS_THREAD_ERROR_GENERAL;
}
if (WELS_THREAD_ERROR_OK != pThread->Start()) {
return WELS_THREAD_ERROR_GENERAL;
}
//fprintf(stdout, "ThreadPool: AddThreadToIdleQueue: %x\n", pThread);
AddThreadToIdleQueue (pThread);
return WELS_THREAD_ERROR_OK;
}
void CWelsThreadPool::DestroyThread (CWelsTaskThread* pThread) {
pThread->Kill();
WELS_DELETE_OP (pThread);
return;
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::AddThreadToIdleQueue (CWelsTaskThread* pThread) {
CWelsAutoLock cLock (m_cLockIdleTasks);
m_cIdleThreads->push_back (pThread);
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::AddThreadToBusyList (CWelsTaskThread* pThread) {
CWelsAutoLock cLock (m_cLockBusyTasks);
m_cBusyThreads->push_back (pThread);
return WELS_THREAD_ERROR_OK;
}
WELS_THREAD_ERROR_CODE CWelsThreadPool::RemoveThreadFromBusyList (CWelsTaskThread* pThread) {
CWelsAutoLock cLock (m_cLockBusyTasks);
if (m_cBusyThreads->erase (pThread)) {
return WELS_THREAD_ERROR_OK;
} else {
return WELS_THREAD_ERROR_GENERAL;
}
}
bool CWelsThreadPool::AddTaskToWaitedList (IWelsTask* pTask) {
CWelsAutoLock cLock (m_cLockWaitedTasks);
return m_cWaitedTasks->push_back (pTask);
}
CWelsTaskThread* CWelsThreadPool::GetIdleThread() {
CWelsAutoLock cLock (m_cLockIdleTasks);
if (NULL == m_cIdleThreads || m_cIdleThreads->size() == 0) {
return NULL;
}
//fprintf(stdout, "CWelsThreadPool::GetIdleThread=%d\n", m_cIdleThreads->size());
CWelsTaskThread* pThread = m_cIdleThreads->begin();
m_cIdleThreads->pop_front();
return pThread;
}
int32_t CWelsThreadPool::GetBusyThreadNum() {
return (m_cBusyThreads?m_cBusyThreads->size():0);
}
int32_t CWelsThreadPool::GetIdleThreadNum() {
return (m_cIdleThreads?m_cIdleThreads->size():0);
}
int32_t CWelsThreadPool::GetWaitedTaskNum() {
return (m_cWaitedTasks?m_cWaitedTasks->size():0);
}
IWelsTask* CWelsThreadPool::GetWaitedTask() {
CWelsAutoLock lock (m_cLockWaitedTasks);
if (NULL==m_cWaitedTasks || m_cWaitedTasks->size() == 0) {
return NULL;
}
IWelsTask* pTask = m_cWaitedTasks->begin();
m_cWaitedTasks->pop_front();
return pTask;
}
void CWelsThreadPool::ClearWaitedTasks() {
CWelsAutoLock cLock (m_cLockWaitedTasks);
if (NULL == m_cWaitedTasks) {
return;
}
IWelsTask* pTask = NULL;
while (0 != m_cWaitedTasks->size()) {
pTask = m_cWaitedTasks->begin();
if (pTask->GetSink()) {
pTask->GetSink()->OnTaskCancelled();
}
m_cWaitedTasks->pop_front();
}
}
}

View File

@ -0,0 +1,906 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "wels_common_defs.h"
namespace WelsCommon {
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////pNonZeroCount[16+8] mapping scan index
const uint8_t g_kuiMbCountScan4Idx[24] = {
// 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8]
0, 1, 4, 5, // 2 3 | 6 7 0 | 1 0 1 2 3
2, 3, 6, 7, //--------------- --------- 4 5 6 7
8, 9, 12, 13, // 8 9 | 12 13 2 | 3 8 9 10 11
10, 11, 14, 15, // 10 11 | 14 15-----------------------------> 12 13 14 15
16, 17, 20, 21, //---------------- chroma 8*8 block 16 17 18 19
18, 19, 22, 23 // 16 17 | 20 21 0 1 20 21 22 23
};
const uint8_t g_kuiCache48CountScan4Idx[24] = {
/* Luma */
9, 10, 17, 18, // 1+1*8, 2+1*8, 1+2*8, 2+2*8,
11, 12, 19, 20, // 3+1*8, 4+1*8, 3+2*8, 4+2*8,
25, 26, 33, 34, // 1+3*8, 2+3*8, 1+4*8, 2+4*8,
27, 28, 35, 36, // 3+3*8, 4+3*8, 3+4*8, 4+4*8,
/* Cb */
14, 15, // 6+1*8, 7+1*8,
22, 23, // 6+2*8, 7+2*8,
/* Cr */
38, 39, // 6+4*8, 7+4*8,
46, 47, // 6+5*8, 7+5*8,
};
const uint8_t g_kuiMatrixV[6][8][8] = { // generated from equation 8-317, 8-318
{
{20, 19, 25, 19, 20, 19, 25, 19},
{19, 18, 24, 18, 19, 18, 24, 18},
{25, 24, 32, 24, 25, 24, 32, 24},
{19, 18, 24, 18, 19, 18, 24, 18},
{20, 19, 25, 19, 20, 19, 25, 19},
{19, 18, 24, 18, 19, 18, 24, 18},
{25, 24, 32, 24, 25, 24, 32, 24},
{19, 18, 24, 18, 19, 18, 24, 18}
},
{
{22, 21, 28, 21, 22, 21, 28, 21},
{21, 19, 26, 19, 21, 19, 26, 19},
{28, 26, 35, 26, 28, 26, 35, 26},
{21, 19, 26, 19, 21, 19, 26, 19},
{22, 21, 28, 21, 22, 21, 28, 21},
{21, 19, 26, 19, 21, 19, 26, 19},
{28, 26, 35, 26, 28, 26, 35, 26},
{21, 19, 26, 19, 21, 19, 26, 19}
},
{
{26, 24, 33, 24, 26, 24, 33, 24},
{24, 23, 31, 23, 24, 23, 31, 23},
{33, 31, 42, 31, 33, 31, 42, 31},
{24, 23, 31, 23, 24, 23, 31, 23},
{26, 24, 33, 24, 26, 24, 33, 24},
{24, 23, 31, 23, 24, 23, 31, 23},
{33, 31, 42, 31, 33, 31, 42, 31},
{24, 23, 31, 23, 24, 23, 31, 23}
},
{
{28, 26, 35, 26, 28, 26, 35, 26},
{26, 25, 33, 25, 26, 25, 33, 25},
{35, 33, 45, 33, 35, 33, 45, 33},
{26, 25, 33, 25, 26, 25, 33, 25},
{28, 26, 35, 26, 28, 26, 35, 26},
{26, 25, 33, 25, 26, 25, 33, 25},
{35, 33, 45, 33, 35, 33, 45, 33},
{26, 25, 33, 25, 26, 25, 33, 25}
},
{
{32, 30, 40, 30, 32, 30, 40, 30},
{30, 28, 38, 28, 30, 28, 38, 28},
{40, 38, 51, 38, 40, 38, 51, 38},
{30, 28, 38, 28, 30, 28, 38, 28},
{32, 30, 40, 30, 32, 30, 40, 30},
{30, 28, 38, 28, 30, 28, 38, 28},
{40, 38, 51, 38, 40, 38, 51, 38},
{30, 28, 38, 28, 30, 28, 38, 28}
},
{
{36, 34, 46, 34, 36, 34, 46, 34},
{34, 32, 43, 32, 34, 32, 43, 32},
{46, 43, 58, 43, 46, 43, 58, 43},
{34, 32, 43, 32, 34, 32, 43, 32},
{36, 34, 46, 34, 36, 34, 46, 34},
{34, 32, 43, 32, 34, 32, 43, 32},
{46, 43, 58, 43, 46, 43, 58, 43},
{34, 32, 43, 32, 34, 32, 43, 32}
}
};
//cache element equal to 30
const uint8_t g_kuiCache30ScanIdx[16] = { //mv or uiRefIndex cache scan index, 4*4 block as basic unit
7, 8, 13, 14,
9, 10, 15, 16,
19, 20, 25, 26,
21, 22, 27, 28
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// extern at wels_common_defs.h
const uint8_t g_kuiChromaQpTable[52] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 29, 30, 31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37,
37, 38, 38, 38, 39, 39, 39, 39
};
/*
* vcl type map for given NAL unit type and corresponding H264 type (0: AVC; 1: SVC).
*/
const EVclType g_keTypeMap[32][2] = {
{ NON_VCL, NON_VCL }, // 0: NAL_UNIT_UNSPEC_0
{ VCL, VCL, }, // 1: NAL_UNIT_CODED_SLICE
{ VCL, NOT_APP }, // 2: NAL_UNIT_CODED_SLICE_DPA
{ VCL, NOT_APP }, // 3: NAL_UNIT_CODED_SLICE_DPB
{ VCL, NOT_APP }, // 4: NAL_UNIT_CODED_SLICE_DPC
{ VCL, VCL }, // 5: NAL_UNIT_CODED_SLICE_IDR
{ NON_VCL, NON_VCL }, // 6: NAL_UNIT_SEI
{ NON_VCL, NON_VCL }, // 7: NAL_UNIT_SPS
{ NON_VCL, NON_VCL }, // 8: NAL_UNIT_PPS
{ NON_VCL, NON_VCL }, // 9: NAL_UNIT_AU_DELIMITER
{ NON_VCL, NON_VCL }, // 10: NAL_UNIT_END_OF_SEQ
{ NON_VCL, NON_VCL }, // 11: NAL_UNIT_END_OF_STR
{ NON_VCL, NON_VCL }, // 12: NAL_UNIT_FILLER_DATA
{ NON_VCL, NON_VCL }, // 13: NAL_UNIT_SPS_EXT
{ NON_VCL, NON_VCL }, // 14: NAL_UNIT_PREFIX, NEED associate succeeded NAL to make a VCL
{ NON_VCL, NON_VCL }, // 15: NAL_UNIT_SUBSET_SPS
{ NON_VCL, NON_VCL }, // 16: NAL_UNIT_DEPTH_PARAM
{ NON_VCL, NON_VCL }, // 17: NAL_UNIT_RESV_17
{ NON_VCL, NON_VCL }, // 18: NAL_UNIT_RESV_18
{ NON_VCL, NON_VCL }, // 19: NAL_UNIT_AUX_CODED_SLICE
{ NON_VCL, VCL }, // 20: NAL_UNIT_CODED_SLICE_EXT
{ NON_VCL, NON_VCL }, // 21: NAL_UNIT_MVC_SLICE_EXT
{ NON_VCL, NON_VCL }, // 22: NAL_UNIT_RESV_22
{ NON_VCL, NON_VCL }, // 23: NAL_UNIT_RESV_23
{ NON_VCL, NON_VCL }, // 24: NAL_UNIT_UNSPEC_24
{ NON_VCL, NON_VCL }, // 25: NAL_UNIT_UNSPEC_25
{ NON_VCL, NON_VCL }, // 26: NAL_UNIT_UNSPEC_26
{ NON_VCL, NON_VCL }, // 27: NAL_UNIT_UNSPEC_27
{ NON_VCL, NON_VCL }, // 28: NAL_UNIT_UNSPEC_28
{ NON_VCL, NON_VCL }, // 29: NAL_UNIT_UNSPEC_29
{ NON_VCL, NON_VCL }, // 30: NAL_UNIT_UNSPEC_30
{ NON_VCL, NON_VCL } // 31: NAL_UNIT_UNSPEC_31
};
//default scaling list matrix value of 4x4
const uint8_t g_kuiDequantScaling4x4Default[2][16]={
{ 6, 13, 20, 28, 13, 20, 28, 32, 20, 28, 32, 37, 28, 32, 37, 42 },
{ 10, 14, 20, 24, 14, 20, 24, 27, 20, 24, 27, 30, 24, 27, 30, 34 }
};
//default scaling list matrix value of 8x8
const uint8_t g_kuiDequantScaling8x8Default[2][64]={
{ 6, 10, 13, 16, 18, 23, 25, 27, 10, 11, 16, 18, 23, 25, 27, 29,
13, 16, 18, 23, 25, 27, 29, 31,
16, 18, 23, 25, 27, 29, 31, 33,
18, 23, 25, 27, 29, 31, 33, 36,
23, 25, 27, 29, 31, 33, 36, 38,
25, 27, 29, 31, 33, 36, 38, 40,
27, 29, 31, 33, 36, 38, 40, 42 },
{ 9, 13, 15, 17, 19, 21, 22, 24,
13, 13, 17, 19, 21, 22, 24, 25,
15, 17, 19, 21, 22, 24, 25, 27,
17, 19, 21, 22, 24, 25, 27, 28,
19, 21, 22, 24, 25, 27, 28, 30,
21, 22, 24, 25, 27, 28, 30, 32,
22, 24, 25, 27, 28, 30, 32, 33,
24, 25, 27, 28, 30, 32, 33, 35 }
};
ALIGNED_DECLARE (const uint16_t, g_kuiDequantCoeff[52][8], 16) = {
/* 0*/{ 10, 13, 10, 13, 13, 16, 13, 16 }, /* 1*/{ 11, 14, 11, 14, 14, 18, 14, 18 },
/* 2*/{ 13, 16, 13, 16, 16, 20, 16, 20 }, /* 3*/{ 14, 18, 14, 18, 18, 23, 18, 23 },
/* 4*/{ 16, 20, 16, 20, 20, 25, 20, 25 }, /* 5*/{ 18, 23, 18, 23, 23, 29, 23, 29 },
/* 6*/{ 20, 26, 20, 26, 26, 32, 26, 32 }, /* 7*/{ 22, 28, 22, 28, 28, 36, 28, 36 },
/* 8*/{ 26, 32, 26, 32, 32, 40, 32, 40 }, /* 9*/{ 28, 36, 28, 36, 36, 46, 36, 46 },
/*10*/{ 32, 40, 32, 40, 40, 50, 40, 50 }, /*11*/{ 36, 46, 36, 46, 46, 58, 46, 58 },
/*12*/{ 40, 52, 40, 52, 52, 64, 52, 64 }, /*13*/{ 44, 56, 44, 56, 56, 72, 56, 72 },
/*14*/{ 52, 64, 52, 64, 64, 80, 64, 80 }, /*15*/{ 56, 72, 56, 72, 72, 92, 72, 92 },
/*16*/{ 64, 80, 64, 80, 80, 100, 80, 100 }, /*17*/{ 72, 92, 72, 92, 92, 116, 92, 116 },
/*18*/{ 80, 104, 80, 104, 104, 128, 104, 128 }, /*19*/{ 88, 112, 88, 112, 112, 144, 112, 144 },
/*20*/{ 104, 128, 104, 128, 128, 160, 128, 160 }, /*21*/{ 112, 144, 112, 144, 144, 184, 144, 184 },
/*22*/{ 128, 160, 128, 160, 160, 200, 160, 200 }, /*23*/{ 144, 184, 144, 184, 184, 232, 184, 232 },
/*24*/{ 160, 208, 160, 208, 208, 256, 208, 256 }, /*25*/{ 176, 224, 176, 224, 224, 288, 224, 288 },
/*26*/{ 208, 256, 208, 256, 256, 320, 256, 320 }, /*27*/{ 224, 288, 224, 288, 288, 368, 288, 368 },
/*28*/{ 256, 320, 256, 320, 320, 400, 320, 400 }, /*29*/{ 288, 368, 288, 368, 368, 464, 368, 464 },
/*30*/{ 320, 416, 320, 416, 416, 512, 416, 512 }, /*31*/{ 352, 448, 352, 448, 448, 576, 448, 576 },
/*32*/{ 416, 512, 416, 512, 512, 640, 512, 640 }, /*33*/{ 448, 576, 448, 576, 576, 736, 576, 736 },
/*34*/{ 512, 640, 512, 640, 640, 800, 640, 800 }, /*35*/{ 576, 736, 576, 736, 736, 928, 736, 928 },
/*36*/{ 640, 832, 640, 832, 832, 1024, 832, 1024 }, /*37*/{ 704, 896, 704, 896, 896, 1152, 896, 1152 },
/*38*/{ 832, 1024, 832, 1024, 1024, 1280, 1024, 1280 }, /*39*/{ 896, 1152, 896, 1152, 1152, 1472, 1152, 1472 },
/*40*/{ 1024, 1280, 1024, 1280, 1280, 1600, 1280, 1600 }, /*41*/{ 1152, 1472, 1152, 1472, 1472, 1856, 1472, 1856 },
/*42*/{ 1280, 1664, 1280, 1664, 1664, 2048, 1664, 2048 }, /*43*/{ 1408, 1792, 1408, 1792, 1792, 2304, 1792, 2304 },
/*44*/{ 1664, 2048, 1664, 2048, 2048, 2560, 2048, 2560 }, /*45*/{ 1792, 2304, 1792, 2304, 2304, 2944, 2304, 2944 },
/*46*/{ 2048, 2560, 2048, 2560, 2560, 3200, 2560, 3200 }, /*47*/{ 2304, 2944, 2304, 2944, 2944, 3712, 2944, 3712 },
/*48*/{ 2560, 3328, 2560, 3328, 3328, 4096, 3328, 4096 }, /*49*/{ 2816, 3584, 2816, 3584, 3584, 4608, 3584, 4608 },
/*50*/{ 3328, 4096, 3328, 4096, 4096, 5120, 4096, 5120 }, /*51*/{ 3584, 4608, 3584, 4608, 4608, 5888, 4608, 5888 },
};
ALIGNED_DECLARE (const uint16_t, g_kuiDequantCoeff8x8[52][64], 16) = {
/* QP == 0 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 1 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 2 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 3 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 4 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 5 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 6 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 7 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 8 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 9 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 10 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 11 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 12 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 13 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 14 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 15 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 16 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 17 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 18 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 19 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 20 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 21 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 22 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 23 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 24 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 25 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 26 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 27 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 28 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 29 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 30 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 31 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 32 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 33 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 34 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 35 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 36 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 37 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 38 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 39 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 40 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 41 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 42 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 43 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 44 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 45 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
/* QP == 46 */
{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
/* QP == 47 */
{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
/* QP == 48 */
{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
/* QP == 49 */
{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
/* QP == 50 */
{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
/* QP == 51 */
{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
};
// table A-1 - Level limits
const SLevelLimits g_ksLevelLimits[LEVEL_NUMBER] = {
{LEVEL_1_0, 1485, 99, 396, 64, 175, -256, 255, 2, 0x7fff}, /* level 1 */
{LEVEL_1_B, 1485, 99, 396, 128, 350, -256, 255, 2, 0x7fff}, /* level 1.b */
{LEVEL_1_1, 3000, 396, 900, 192, 500, -512, 511, 2, 0x7fff}, /* level 1.1 */
{LEVEL_1_2, 6000, 396, 2376, 384, 1000, -512, 511, 2, 0x7fff}, /* level 1.2 */
{LEVEL_1_3, 11880, 396, 2376, 768, 2000, -512, 511, 2, 0x7fff}, /* level 1.3 */
{LEVEL_2_0, 11880, 396, 2376, 2000, 2000, -512, 511, 2, 0x7fff}, /* level 2 */
{LEVEL_2_1, 19800, 792, 4752, 4000, 4000, -1024, 1023, 2, 0x7fff}, /* level 2.1 */
{LEVEL_2_2, 20250, 1620, 8100, 4000, 4000, -1024, 1023, 2, 0x7fff}, /* level 2.2 */
{LEVEL_3_0, 40500, 1620, 8100, 10000, 10000, -1024, 1023, 2, 32 }, /* level 3 */
{LEVEL_3_1, 108000, 3600, 18000, 14000, 14000, -2048, 2047, 4, 16}, /* level 3.1 */
{LEVEL_3_2, 216000, 5120, 20480, 20000, 20000, -2048, 2047, 4, 16}, /* level 3.2 */
{LEVEL_4_0, 245760, 8192, 32768, 20000, 25000, -2048, 2047, 4, 16}, /* level 4 */
{LEVEL_4_1, 245760, 8192, 32768, 50000, 62500, -2048, 2047, 2, 16}, /* level 4.1 */
{LEVEL_4_2, 522240, 8704, 34816, 50000, 62500, -2048, 2047, 2, 16}, /* level 4.2 */
{LEVEL_5_0, 589824, 22080, 110400, 135000, 135000, -2048, 2047, 2, 16}, /* level 5 */
{LEVEL_5_1, 983040, 36864, 184320, 240000, 240000, -2048, 2047, 2, 16}, /* level 5.1 */
{LEVEL_5_2, 2073600, 36864, 184320, 240000, 240000, -2048, 2047, 2, 16} /* level 5.2 */
};
const uint32_t g_kuiLevelMaps[LEVEL_NUMBER] = {
10, 9, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52
};
//for cabac
/* this table is from Table9-12 to Table 9-24 */
const int8_t g_kiCabacGlobalContextIdx[WELS_CONTEXT_COUNT][4][2] = {
//0-10 Table 9-12
{{20, -15}, {20, -15}, {20, -15}, {20, -15}},
{{2, 54}, {2, 54}, {2, 54}, {2, 54}},
{{3, 74}, {3, 74}, {3, 74}, {3, 74}},
{{20, -15}, {20, -15}, {20, -15}, {20, -15}},
{{2, 54}, {2, 54}, {2, 54}, {2, 54}},
{{3, 74}, {3, 74}, {3, 74}, {3, 74}},
{{ -28, 127}, { -28, 127}, { -28, 127}, { -28, 127}},
{{ -23, 104}, { -23, 104}, { -23, 104}, { -23, 104}},
{{ -6, 53}, { -6, 53}, { -6, 53}, { -6, 53}},
{{ -1, 54}, { -1, 54}, { -1, 54}, { -1, 54}},
{{7, 51}, {7, 51}, {7, 51}, {7, 51}},
//11-23 Table 9-13
{{CTX_NA, CTX_NA}, {23, 33}, {22, 25}, {29, 16}},
{{CTX_NA, CTX_NA}, {23, 2}, {34, 0}, {25, 0}},
{{CTX_NA, CTX_NA}, {21, 0}, {16, 0}, {14, 0}},
{{CTX_NA, CTX_NA}, {1, 9}, { -2, 9}, { -10, 51}},
{{CTX_NA, CTX_NA}, {0, 49}, {4, 41}, { -3, 62}},
{{CTX_NA, CTX_NA}, { -37, 118}, { -29, 118}, { -27, 99}},
{{CTX_NA, CTX_NA}, {5, 57}, {2, 65}, {26, 16}},
{{CTX_NA, CTX_NA}, { -13, 78}, { -6, 71}, { -4, 85}},
{{CTX_NA, CTX_NA}, { -11, 65}, { -13, 79}, { -24, 102}},
{{CTX_NA, CTX_NA}, {1, 62}, {5, 52}, {5, 57}},
{{CTX_NA, CTX_NA}, {12, 49}, {9, 50}, {6, 57}},
{{CTX_NA, CTX_NA}, { -4, 73}, { -3, 70}, { -17, 73}},
{{CTX_NA, CTX_NA}, {17, 50}, {10, 54}, {14, 57}},
//24-39 Table9-14
{{CTX_NA, CTX_NA}, {18, 64}, {26, 34}, {20, 40}},
{{CTX_NA, CTX_NA}, {9, 43}, {19, 22}, {20, 10}},
{{CTX_NA, CTX_NA}, {29, 0}, {40, 0}, {29, 0}},
{{CTX_NA, CTX_NA}, {26, 67}, {57, 2}, {54, 0}},
{{CTX_NA, CTX_NA}, {16, 90}, {41, 36}, {37, 42}},
{{CTX_NA, CTX_NA}, {9, 104}, {26, 69}, {12, 97}},
{{CTX_NA, CTX_NA}, { -46, 127}, { -45, 127}, { -32, 127}},
{{CTX_NA, CTX_NA}, { -20, 104}, { -15, 101}, { -22, 117}},
{{CTX_NA, CTX_NA}, {1, 67}, { -4, 76}, { -2, 74}},
{{CTX_NA, CTX_NA}, { -13, 78}, { -6, 71}, { -4, 85}},
{{CTX_NA, CTX_NA}, { -11, 65}, { -13, 79}, { -24, 102}},
{{CTX_NA, CTX_NA}, {1, 62}, {5, 52}, {5, 57}},
{{CTX_NA, CTX_NA}, { -6, 86}, {6, 69}, { -6, 93}},
{{CTX_NA, CTX_NA}, { -17, 95}, { -13, 90}, { -14, 88}},
{{CTX_NA, CTX_NA}, { -6, 61}, {0, 52}, { -6, 44}},
{{CTX_NA, CTX_NA}, {9, 45}, {8, 43}, {4, 55}},
//40-53 Table 9-15
{{CTX_NA, CTX_NA}, { -3, 69}, { -2, 69}, { -11, 89}},
{{CTX_NA, CTX_NA}, { -6, 81}, { -5, 82}, { -15, 103}},
{{CTX_NA, CTX_NA}, { -11, 96}, { -10, 96}, { -21, 116}},
{{CTX_NA, CTX_NA}, {6, 55}, {2, 59}, {19, 57}},
{{CTX_NA, CTX_NA}, {7, 67}, {2, 75}, {20, 58}},
{{CTX_NA, CTX_NA}, { -5, 86}, { -3, 87}, {4, 84}},
{{CTX_NA, CTX_NA}, {2, 88}, { -3, 100}, {6, 96}},
{{CTX_NA, CTX_NA}, {0, 58}, {1, 56}, {1, 63}},
{{CTX_NA, CTX_NA}, { -3, 76}, { -3, 74}, { -5, 85}},
{{CTX_NA, CTX_NA}, { -10, 94}, { -6, 85}, { -13, 106}},
{{CTX_NA, CTX_NA}, {5, 54}, {0, 59}, {5, 63}},
{{CTX_NA, CTX_NA}, {4, 69}, { -3, 81}, {6, 75}},
{{CTX_NA, CTX_NA}, { -3, 81}, { -7, 86}, { -3, 90}},
{{CTX_NA, CTX_NA}, {0, 88}, { -5, 95}, { -1, 101}},
//54-59 Table 9-16
{{CTX_NA, CTX_NA}, { -7, 67}, { -1, 66}, {3, 55}},
{{CTX_NA, CTX_NA}, { -5, 74}, { -1, 77}, { -4, 79}},
{{CTX_NA, CTX_NA}, { -4, 74}, {1, 70}, { -2, 75}},
{{CTX_NA, CTX_NA}, { -5, 80}, { -2, 86}, { -12, 97}},
{{CTX_NA, CTX_NA}, { -7, 72}, { -5, 72}, { -7, 50}},
{{CTX_NA, CTX_NA}, {1, 58}, {0, 61}, {1, 60}},
//60-69 Table 9-17
{{0, 41}, {0, 41}, {0, 41}, {0, 41}},
{{0, 63}, {0, 63}, {0, 63}, {0, 63}},
{{0, 63}, {0, 63}, {0, 63}, {0, 63}},
{{0, 63}, {0, 63}, {0, 63}, {0, 63}},
{{ -9, 83}, { -9, 83}, { -9, 83}, { -9, 83}},
{{4, 86}, {4, 86}, {4, 86}, {4, 86}},
{{0, 97}, {0, 97}, {0, 97}, {0, 97}},
{{ -7, 72}, { -7, 72}, { -7, 72}, { -7, 72}},
{{13, 41}, {13, 41}, {13, 41}, {13, 41}},
{{3, 62}, {3, 62}, {3, 62}, {3, 62}},
//70-104 Table 9-18
{{0, 11}, {0, 45}, {13, 15}, {7, 34}},
{{1, 55}, { -4, 78}, {7, 51}, { -9, 88}},
{{0, 69}, { -3, 96}, {2, 80}, { -20, 127}},
{{ -17, 127}, { -27, 126}, { -39, 127}, { -36, 127}},
{{ -13, 102}, { -28, 98}, { -18, 91}, { -17, 91}},
{{0, 82}, { -25, 101}, { -17, 96}, { -14, 95}},
{{ -7, 74}, { -23, 67}, { -26, 81}, { -25, 84}},
{{ -21, 107}, { -28, 82}, { -35, 98}, { -25, 86}},
{{ -27, 127}, { -20, 94}, { -24, 102}, { -12, 89}},
{{ -31, 127}, { -16, 83}, { -23, 97}, { -17, 91}},
{{ -24, 127}, { -22, 110}, { -27, 119}, { -31, 127}},
{{ -18, 95}, { -21, 91}, { -24, 99}, { -14, 76}},
{{ -27, 127}, { -18, 102}, { -21, 110}, { -18, 103}},
{{ -21, 114}, { -13, 93}, { -18, 102}, { -13, 90}},
{{ -30, 127}, { -29, 127}, { -36, 127}, { -37, 127}},
{{ -17, 123}, { -7, 92}, {0, 80}, {11, 80}},
{{ -12, 115}, { -5, 89}, { -5, 89}, {5, 76}},
{{ -16, 122}, { -7, 96}, { -7, 94}, {2, 84}},
{{ -11, 115}, { -13, 108}, { -4, 92}, {5, 78}},
{{ -12, 63}, { -3, 46}, {0, 39}, { -6, 55}},
{{ -2, 68}, { -1, 65}, {0, 65}, {4, 61}},
{{ -15, 84}, { -1, 57}, { -15, 84}, { -14, 83}},
{{ -13, 104}, { -9, 93}, { -35, 127}, { -37, 127}},
{{ -3, 70}, { -3, 74}, { -2, 73}, { -5, 79}},
{{ -8, 93}, { -9, 92}, { -12, 104}, { -11, 104}},
{{ -10, 90}, { -8, 87}, { -9, 91}, { -11, 91}},
{{ -30, 127}, { -23, 126}, { -31, 127}, { -30, 127}},
{{ -1, 74}, {5, 54}, {3, 55}, {0, 65}},
{{ -6, 97}, {6, 60}, {7, 56}, { -2, 79}},
{{ -7, 91}, {6, 59}, {7, 55}, {0, 72}},
{{ -20, 127}, {6, 69}, {8, 61}, { -4, 92}},
{{ -4, 56}, { -1, 48}, { -3, 53}, { -6, 56}},
{{ -5, 82}, {0, 68}, {0, 68}, {3, 68}},
{{ -7, 76}, { -4, 69}, { -7, 74}, { -8, 71}},
{{ -22, 125}, { -8, 88}, { -9, 88}, { -13, 98}},
//105-165 Table 9-19
{{ -7, 93}, { -2, 85}, { -13, 103}, { -4, 86}},
{{ -11, 87}, { -6, 78}, { -13, 91}, { -12, 88}},
{{ -3, 77}, { -1, 75}, { -9, 89}, { -5, 82}},
{{ -5, 71}, { -7, 77}, { -14, 92}, { -3, 72}},
{{ -4, 63}, {2, 54}, { -8, 76}, { -4, 67}},
{{ -4, 68}, {5, 50}, { -12, 87}, { -8, 72}},
{{ -12, 84}, { -3, 68}, { -23, 110}, { -16, 89}},
{{ -7, 62}, {1, 50}, { -24, 105}, { -9, 69}},
{{ -7, 65}, {6, 42}, { -10, 78}, { -1, 59}},
{{8, 61}, { -4, 81}, { -20, 112}, {5, 66}},
{{5, 56}, {1, 63}, { -17, 99}, {4, 57}},
{{ -2, 66}, { -4, 70}, { -78, 127}, { -4, 71}},
{{1, 64}, {0, 67}, { -70, 127}, { -2, 71}},
{{0, 61}, {2, 57}, { -50, 127}, {2, 58}},
{{ -2, 78}, { -2, 76}, { -46, 127}, { -1, 74}},
{{1, 50}, {11, 35}, { -4, 66}, { -4, 44}},
{{7, 52}, {4, 64}, { -5, 78}, { -1, 69}},
{{10, 35}, {1, 61}, { -4, 71}, {0, 62}},
{{0, 44}, {11, 35}, { -8, 72}, { -7, 51}},
{{11, 38}, {18, 25}, {2, 59}, { -4, 47}},
{{1, 45}, {12, 24}, { -1, 55}, { -6, 42}},
{{0, 46}, {13, 29}, { -7, 70}, { -3, 41}},
{{5, 44}, {13, 36}, { -6, 75}, { -6, 53}},
{{31, 17}, { -10, 93}, { -8, 89}, {8, 76}},
{{1, 51}, { -7, 73}, { -34, 119}, { -9, 78}},
{{7, 50}, { -2, 73}, { -3, 75}, { -11, 83}},
{{28, 19}, {13, 46}, {32, 20}, {9, 52}},
{{16, 33}, {9, 49}, {30, 22}, {0, 67}},
{{14, 62}, { -7, 100}, { -44, 127}, { -5, 90}},
{{ -13, 108}, {9, 53}, {0, 54}, {1, 67}},
{{ -15, 100}, {2, 53}, { -5, 61}, { -15, 72}},
{{ -13, 101}, {5, 53}, {0, 58}, { -5, 75}},
{{ -13, 91}, { -2, 61}, { -1, 60}, { -8, 80}},
{{ -12, 94}, {0, 56}, { -3, 61}, { -21, 83}},
{{ -10, 88}, {0, 56}, { -8, 67}, { -21, 64}},
{{ -16, 84}, { -13, 63}, { -25, 84}, { -13, 31}},
{{ -10, 86}, { -5, 60}, { -14, 74}, { -25, 64}},
{{ -7, 83}, { -1, 62}, { -5, 65}, { -29, 94}},
{{ -13, 87}, {4, 57}, {5, 52}, {9, 75}},
{{ -19, 94}, { -6, 69}, {2, 57}, {17, 63}},
{{1, 70}, {4, 57}, {0, 61}, { -8, 74}},
{{0, 72}, {14, 39}, { -9, 69}, { -5, 35}},
{{ -5, 74}, {4, 51}, { -11, 70}, { -2, 27}},
{{18, 59}, {13, 68}, {18, 55}, {13, 91}},
{{ -8, 102}, {3, 64}, { -4, 71}, {3, 65}},
{{ -15, 100}, {1, 61}, {0, 58}, { -7, 69}},
{{0, 95}, {9, 63}, {7, 61}, {8, 77}},
{{ -4, 75}, {7, 50}, {9, 41}, { -10, 66}},
{{2, 72}, {16, 39}, {18, 25}, {3, 62}},
{{ -11, 75}, {5, 44}, {9, 32}, { -3, 68}},
{{ -3, 71}, {4, 52}, {5, 43}, { -20, 81}},
{{15, 46}, {11, 48}, {9, 47}, {0, 30}},
{{ -13, 69}, { -5, 60}, {0, 44}, {1, 7}},
{{0, 62}, { -1, 59}, {0, 51}, { -3, 23}},
{{0, 65}, {0, 59}, {2, 46}, { -21, 74}},
{{21, 37}, {22, 33}, {19, 38}, {16, 66}},
{{ -15, 72}, {5, 44}, { -4, 66}, { -23, 124}},
{{9, 57}, {14, 43}, {15, 38}, {17, 37}},
{{16, 54}, { -1, 78}, {12, 42}, {44, -18}},
{{0, 62}, {0, 60}, {9, 34}, {50, -34}},
{{12, 72}, {9, 69}, {0, 89}, { -22, 127}},
//166-226 Table 9-20
{{24, 0}, {11, 28}, {4, 45}, {4, 39}},
{{15, 9}, {2, 40}, {10, 28}, {0, 42}},
{{8, 25}, {3, 44}, {10, 31}, {7, 34}},
{{13, 18}, {0, 49}, {33, -11}, {11, 29}},
{{15, 9}, {0, 46}, {52, -43}, {8, 31}},
{{13, 19}, {2, 44}, {18, 15}, {6, 37}},
{{10, 37}, {2, 51}, {28, 0}, {7, 42}},
{{12, 18}, {0, 47}, {35, -22}, {3, 40}},
{{6, 29}, {4, 39}, {38, -25}, {8, 33}},
{{20, 33}, {2, 62}, {34, 0}, {13, 43}},
{{15, 30}, {6, 46}, {39, -18}, {13, 36}},
{{4, 45}, {0, 54}, {32, -12}, {4, 47}},
{{1, 58}, {3, 54}, {102, -94}, {3, 55}},
{{0, 62}, {2, 58}, {0, 0}, {2, 58}},
{{7, 61}, {4, 63}, {56, -15}, {6, 60}},
{{12, 38}, {6, 51}, {33, -4}, {8, 44}},
{{11, 45}, {6, 57}, {29, 10}, {11, 44}},
{{15, 39}, {7, 53}, {37, -5}, {14, 42}},
{{11, 42}, {6, 52}, {51, -29}, {7, 48}},
{{13, 44}, {6, 55}, {39, -9}, {4, 56}},
{{16, 45}, {11, 45}, {52, -34}, {4, 52}},
{{12, 41}, {14, 36}, {69, -58}, {13, 37}},
{{10, 49}, {8, 53}, {67, -63}, {9, 49}},
{{30, 34}, { -1, 82}, {44, -5}, {19, 58}},
{{18, 42}, {7, 55}, {32, 7}, {10, 48}},
{{10, 55}, { -3, 78}, {55, -29}, {12, 45}},
{{17, 51}, {15, 46}, {32, 1}, {0, 69}},
{{17, 46}, {22, 31}, {0, 0}, {20, 33}},
{{0, 89}, { -1, 84}, {27, 36}, {8, 63}},
{{26, -19}, {25, 7}, {33, -25}, {35, -18}},
{{22, -17}, {30, -7}, {34, -30}, {33, -25}},
{{26, -17}, {28, 3}, {36, -28}, {28, -3}},
{{30, -25}, {28, 4}, {38, -28}, {24, 10}},
{{28, -20}, {32, 0}, {38, -27}, {27, 0}},
{{33, -23}, {34, -1}, {34, -18}, {34, -14}},
{{37, -27}, {30, 6}, {35, -16}, {52, -44}},
{{33, -23}, {30, 6}, {34, -14}, {39, -24}},
{{40, -28}, {32, 9}, {32, -8}, {19, 17}},
{{38, -17}, {31, 19}, {37, -6}, {31, 25}},
{{33, -11}, {26, 27}, {35, 0}, {36, 29}},
{{40, -15}, {26, 30}, {30, 10}, {24, 33}},
{{41, -6}, {37, 20}, {28, 18}, {34, 15}},
{{38, 1}, {28, 34}, {26, 25}, {30, 20}},
{{41, 17}, {17, 70}, {29, 41}, {22, 73}},
{{30, -6}, {1, 67}, {0, 75}, {20, 34}},
{{27, 3}, {5, 59}, {2, 72}, {19, 31}},
{{26, 22}, {9, 67}, {8, 77}, {27, 44}},
{{37, -16}, {16, 30}, {14, 35}, {19, 16}},
{{35, -4}, {18, 32}, {18, 31}, {15, 36}},
{{38, -8}, {18, 35}, {17, 35}, {15, 36}},
{{38, -3}, {22, 29}, {21, 30}, {21, 28}},
{{37, 3}, {24, 31}, {17, 45}, {25, 21}},
{{38, 5}, {23, 38}, {20, 42}, {30, 20}},
{{42, 0}, {18, 43}, {18, 45}, {31, 12}},
{{35, 16}, {20, 41}, {27, 26}, {27, 16}},
{{39, 22}, {11, 63}, {16, 54}, {24, 42}},
{{14, 48}, {9, 59}, {7, 66}, {0, 93}},
{{27, 37}, {9, 64}, {16, 56}, {14, 56}},
{{21, 60}, { -1, 94}, {11, 73}, {15, 57}},
{{12, 68}, { -2, 89}, {10, 67}, {26, 38}},
{{2, 97}, { -9, 108}, { -10, 116}, { -24, 127}},
//227-275 Table 9-21
{{ -3, 71}, { -6, 76}, { -23, 112}, { -24, 115}},
{{ -6, 42}, { -2, 44}, { -15, 71}, { -22, 82}},
{{ -5, 50}, {0, 45}, { -7, 61}, { -9, 62}},
{{ -3, 54}, {0, 52}, {0, 53}, {0, 53}},
{{ -2, 62}, { -3, 64}, { -5, 66}, {0, 59}},
{{0, 58}, { -2, 59}, { -11, 77}, { -14, 85}},
{{1, 63}, { -4, 70}, { -9, 80}, { -13, 89}},
{{ -2, 72}, { -4, 75}, { -9, 84}, { -13, 94}},
{{ -1, 74}, { -8, 82}, { -10, 87}, { -11, 92}},
{{ -9, 91}, { -17, 102}, { -34, 127}, { -29, 127}},
{{ -5, 67}, { -9, 77}, { -21, 101}, { -21, 100}},
{{ -5, 27}, {3, 24}, { -3, 39}, { -14, 57}},
{{ -3, 39}, {0, 42}, { -5, 53}, { -12, 67}},
{{ -2, 44}, {0, 48}, { -7, 61}, { -11, 71}},
{{0, 46}, {0, 55}, { -11, 75}, { -10, 77}},
{{ -16, 64}, { -6, 59}, { -15, 77}, { -21, 85}},
{{ -8, 68}, { -7, 71}, { -17, 91}, { -16, 88}},
{{ -10, 78}, { -12, 83}, { -25, 107}, { -23, 104}},
{{ -6, 77}, { -11, 87}, { -25, 111}, { -15, 98}},
{{ -10, 86}, { -30, 119}, { -28, 122}, { -37, 127}},
{{ -12, 92}, {1, 58}, { -11, 76}, { -10, 82}},
{{ -15, 55}, { -3, 29}, { -10, 44}, { -8, 48}},
{{ -10, 60}, { -1, 36}, { -10, 52}, { -8, 61}},
{{ -6, 62}, {1, 38}, { -10, 57}, { -8, 66}},
{{ -4, 65}, {2, 43}, { -9, 58}, { -7, 70}},
{{ -12, 73}, { -6, 55}, { -16, 72}, { -14, 75}},
{{ -8, 76}, {0, 58}, { -7, 69}, { -10, 79}},
{{ -7, 80}, {0, 64}, { -4, 69}, { -9, 83}},
{{ -9, 88}, { -3, 74}, { -5, 74}, { -12, 92}},
{{ -17, 110}, { -10, 90}, { -9, 86}, { -18, 108}},
{{ -11, 97}, {0, 70}, {2, 66}, { -4, 79}},
{{ -20, 84}, { -4, 29}, { -9, 34}, { -22, 69}},
{{ -11, 79}, {5, 31}, {1, 32}, { -16, 75}},
{{ -6, 73}, {7, 42}, {11, 31}, { -2, 58}},
{{ -4, 74}, {1, 59}, {5, 52}, {1, 58}},
{{ -13, 86}, { -2, 58}, { -2, 55}, { -13, 78}},
{{ -13, 96}, { -3, 72}, { -2, 67}, { -9, 83}},
{{ -11, 97}, { -3, 81}, {0, 73}, { -4, 81}},
{{ -19, 117}, { -11, 97}, { -8, 89}, { -13, 99}},
{{ -8, 78}, {0, 58}, {3, 52}, { -13, 81}},
{{ -5, 33}, {8, 5}, {7, 4}, { -6, 38}},
{{ -4, 48}, {10, 14}, {10, 8}, { -13, 62}},
{{ -2, 53}, {14, 18}, {17, 8}, { -6, 58}},
{{ -3, 62}, {13, 27}, {16, 19}, { -2, 59}},
{{ -13, 71}, {2, 40}, {3, 37}, { -16, 73}},
{{ -10, 79}, {0, 58}, { -1, 61}, { -10, 76}},
{{ -12, 86}, { -3, 70}, { -5, 73}, { -13, 86}},
{{ -13, 90}, { -6, 79}, { -1, 70}, { -9, 83}},
{{ -14, 97}, { -8, 85}, { -4, 78}, { -10, 87}},
//276 no use
{{CTX_NA, CTX_NA}, {CTX_NA, CTX_NA}, {CTX_NA, CTX_NA}, {CTX_NA, CTX_NA}},
//277-337 Table 9-22
{{ -6, 93}, { -13, 106}, { -21, 126}, { -22, 127}},
{{ -6, 84}, { -16, 106}, { -23, 124}, { -25, 127}},
{{ -8, 79}, { -10, 87}, { -20, 110}, { -25, 120}},
{{0, 66}, { -21, 114}, { -26, 126}, { -27, 127}},
{{ -1, 71}, { -18, 110}, { -25, 124}, { -19, 114}},
{{0, 62}, { -14, 98}, { -17, 105}, { -23, 117}},
{{ -2, 60}, { -22, 110}, { -27, 121}, { -25, 118}},
{{ -2, 59}, { -21, 106}, { -27, 117}, { -26, 117}},
{{ -5, 75}, { -18, 103}, { -17, 102}, { -24, 113}},
{{ -3, 62}, { -21, 107}, { -26, 117}, { -28, 118}},
{{ -4, 58}, { -23, 108}, { -27, 116}, { -31, 120}},
{{ -9, 66}, { -26, 112}, { -33, 122}, { -37, 124}},
{{ -1, 79}, { -10, 96}, { -10, 95}, { -10, 94}},
{{0, 71}, { -12, 95}, { -14, 100}, { -15, 102}},
{{3, 68}, { -5, 91}, { -8, 95}, { -10, 99}},
{{10, 44}, { -9, 93}, { -17, 111}, { -13, 106}},
{{ -7, 62}, { -22, 94}, { -28, 114}, { -50, 127}},
{{15, 36}, { -5, 86}, { -6, 89}, { -5, 92}},
{{14, 40}, {9, 67}, { -2, 80}, {17, 57}},
{{16, 27}, { -4, 80}, { -4, 82}, { -5, 86}},
{{12, 29}, { -10, 85}, { -9, 85}, { -13, 94}},
{{1, 44}, { -1, 70}, { -8, 81}, { -12, 91}},
{{20, 36}, {7, 60}, { -1, 72}, { -2, 77}},
{{18, 32}, {9, 58}, {5, 64}, {0, 71}},
{{5, 42}, {5, 61}, {1, 67}, { -1, 73}},
{{1, 48}, {12, 50}, {9, 56}, {4, 64}},
{{10, 62}, {15, 50}, {0, 69}, { -7, 81}},
{{17, 46}, {18, 49}, {1, 69}, {5, 64}},
{{9, 64}, {17, 54}, {7, 69}, {15, 57}},
{{ -12, 104}, {10, 41}, { -7, 69}, {1, 67}},
{{ -11, 97}, {7, 46}, { -6, 67}, {0, 68}},
{{ -16, 96}, { -1, 51}, { -16, 77}, { -10, 67}},
{{ -7, 88}, {7, 49}, { -2, 64}, {1, 68}},
{{ -8, 85}, {8, 52}, {2, 61}, {0, 77}},
{{ -7, 85}, {9, 41}, { -6, 67}, {2, 64}},
{{ -9, 85}, {6, 47}, { -3, 64}, {0, 68}},
{{ -13, 88}, {2, 55}, {2, 57}, { -5, 78}},
{{4, 66}, {13, 41}, { -3, 65}, {7, 55}},
{{ -3, 77}, {10, 44}, { -3, 66}, {5, 59}},
{{ -3, 76}, {6, 50}, {0, 62}, {2, 65}},
{{ -6, 76}, {5, 53}, {9, 51}, {14, 54}},
{{10, 58}, {13, 49}, { -1, 66}, {15, 44}},
{{ -1, 76}, {4, 63}, { -2, 71}, {5, 60}},
{{ -1, 83}, {6, 64}, { -2, 75}, {2, 70}},
{{ -7, 99}, { -2, 69}, { -1, 70}, { -2, 76}},
{{ -14, 95}, { -2, 59}, { -9, 72}, { -18, 86}},
{{2, 95}, {6, 70}, {14, 60}, {12, 70}},
{{0, 76}, {10, 44}, {16, 37}, {5, 64}},
{{ -5, 74}, {9, 31}, {0, 47}, { -12, 70}},
{{0, 70}, {12, 43}, {18, 35}, {11, 55}},
{{ -11, 75}, {3, 53}, {11, 37}, {5, 56}},
{{1, 68}, {14, 34}, {12, 41}, {0, 69}},
{{0, 65}, {10, 38}, {10, 41}, {2, 65}},
{{ -14, 73}, { -3, 52}, {2, 48}, { -6, 74}},
{{3, 62}, {13, 40}, {12, 41}, {5, 54}},
{{4, 62}, {17, 32}, {13, 41}, {7, 54}},
{{ -1, 68}, {7, 44}, {0, 59}, { -6, 76}},
{{ -13, 75}, {7, 38}, {3, 50}, { -11, 82}},
{{11, 55}, {13, 50}, {19, 40}, { -2, 77}},
{{5, 64}, {10, 57}, {3, 66}, { -2, 77}},
{{12, 70}, {26, 43}, {18, 50}, {25, 42}},
//338-398 Table9-23
{{15, 6}, {14, 11}, {19, -6}, {17, -13}},
{{6, 19}, {11, 14}, {18, -6}, {16, -9}},
{{7, 16}, {9, 11}, {14, 0}, {17, -12}},
{{12, 14}, {18, 11}, {26, -12}, {27, -21}},
{{18, 13}, {21, 9}, {31, -16}, {37, -30}},
{{13, 11}, {23, -2}, {33, -25}, {41, -40}},
{{13, 15}, {32, -15}, {33, -22}, {42, -41}},
{{15, 16}, {32, -15}, {37, -28}, {48, -47}},
{{12, 23}, {34, -21}, {39, -30}, {39, -32}},
{{13, 23}, {39, -23}, {42, -30}, {46, -40}},
{{15, 20}, {42, -33}, {47, -42}, {52, -51}},
{{14, 26}, {41, -31}, {45, -36}, {46, -41}},
{{14, 44}, {46, -28}, {49, -34}, {52, -39}},
{{17, 40}, {38, -12}, {41, -17}, {43, -19}},
{{17, 47}, {21, 29}, {32, 9}, {32, 11}},
{{24, 17}, {45, -24}, {69, -71}, {61, -55}},
{{21, 21}, {53, -45}, {63, -63}, {56, -46}},
{{25, 22}, {48, -26}, {66, -64}, {62, -50}},
{{31, 27}, {65, -43}, {77, -74}, {81, -67}},
{{22, 29}, {43, -19}, {54, -39}, {45, -20}},
{{19, 35}, {39, -10}, {52, -35}, {35, -2}},
{{14, 50}, {30, 9}, {41, -10}, {28, 15}},
{{10, 57}, {18, 26}, {36, 0}, {34, 1}},
{{7, 63}, {20, 27}, {40, -1}, {39, 1}},
{{ -2, 77}, {0, 57}, {30, 14}, {30, 17}},
{{ -4, 82}, { -14, 82}, {28, 26}, {20, 38}},
{{ -3, 94}, { -5, 75}, {23, 37}, {18, 45}},
{{9, 69}, { -19, 97}, {12, 55}, {15, 54}},
{{ -12, 109}, { -35, 125}, {11, 65}, {0, 79}},
{{36, -35}, {27, 0}, {37, -33}, {36, -16}},
{{36, -34}, {28, 0}, {39, -36}, {37, -14}},
{{32, -26}, {31, -4}, {40, -37}, {37, -17}},
{{37, -30}, {27, 6}, {38, -30}, {32, 1}},
{{44, -32}, {34, 8}, {46, -33}, {34, 15}},
{{34, -18}, {30, 10}, {42, -30}, {29, 15}},
{{34, -15}, {24, 22}, {40, -24}, {24, 25}},
{{40, -15}, {33, 19}, {49, -29}, {34, 22}},
{{33, -7}, {22, 32}, {38, -12}, {31, 16}},
{{35, -5}, {26, 31}, {40, -10}, {35, 18}},
{{33, 0}, {21, 41}, {38, -3}, {31, 28}},
{{38, 2}, {26, 44}, {46, -5}, {33, 41}},
{{33, 13}, {23, 47}, {31, 20}, {36, 28}},
{{23, 35}, {16, 65}, {29, 30}, {27, 47}},
{{13, 58}, {14, 71}, {25, 44}, {21, 62}},
{{29, -3}, {8, 60}, {12, 48}, {18, 31}},
{{26, 0}, {6, 63}, {11, 49}, {19, 26}},
{{22, 30}, {17, 65}, {26, 45}, {36, 24}},
{{31, -7}, {21, 24}, {22, 22}, {24, 23}},
{{35, -15}, {23, 20}, {23, 22}, {27, 16}},
{{34, -3}, {26, 23}, {27, 21}, {24, 30}},
{{34, 3}, {27, 32}, {33, 20}, {31, 29}},
{{36, -1}, {28, 23}, {26, 28}, {22, 41}},
{{34, 5}, {28, 24}, {30, 24}, {22, 42}},
{{32, 11}, {23, 40}, {27, 34}, {16, 60}},
{{35, 5}, {24, 32}, {18, 42}, {15, 52}},
{{34, 12}, {28, 29}, {25, 39}, {14, 60}},
{{39, 11}, {23, 42}, {18, 50}, {3, 78}},
{{30, 29}, {19, 57}, {12, 70}, { -16, 123}},
{{34, 26}, {22, 53}, {21, 54}, {21, 53}},
{{29, 39}, {22, 61}, {14, 71}, {22, 56}},
{{19, 66}, {11, 86}, {11, 83}, {25, 61}},
{{31, 21}, {12, 40}, {25, 32}, {21, 33}},
{{31, 31}, {11, 51}, {21, 49}, {19, 50}},
{{25, 50}, {14, 59}, {21, 54}, {17, 61}},
//402-459 Table 9-24
{{ -17, 120}, { -4, 79}, { -5, 85}, { -3, 78}},
{{ -20, 112}, { -7, 71}, { -6, 81}, { -8, 74}},
{{ -18, 114}, { -5, 69}, { -10, 77}, { -9, 72}},
{{ -11, 85}, { -9, 70}, { -7, 81}, { -10, 72}},
{{ -15, 92}, { -8, 66}, { -17, 80}, { -18, 75}},
{{ -14, 89}, { -10, 68}, { -18, 73}, { -12, 71}},
{{ -26, 71}, { -19, 73}, { -4, 74}, { -11, 63}},
{{ -15, 81}, { -12, 69}, { -10, 83}, { -5, 70}},
{{ -14, 80}, { -16, 70}, { -9, 71}, { -17, 75}},
{{0, 68}, { -15, 67}, { -9, 67}, { -14, 72}},
{{ -14, 70}, { -20, 62}, { -1, 61}, { -16, 67}},
{{ -24, 56}, { -19, 70}, { -8, 66}, { -8, 53}},
{{ -23, 68}, { -16, 66}, { -14, 66}, { -14, 59}},
{{ -24, 50}, { -22, 65}, {0, 59}, { -9, 52}},
{{ -11, 74}, { -20, 63}, {2, 59}, { -11, 68}},
{{23, -13}, {9, -2}, {17, -10}, {9, -2}},
{{26, -13}, {26, -9}, {32, -13}, {30, -10}},
{{40, -15}, {33, -9}, {42, -9}, {31, -4}},
{{49, -14}, {39, -7}, {49, -5}, {33, -1}},
{{44, 3}, {41, -2}, {53, 0}, {33, 7}},
{{45, 6}, {45, 3}, {64, 3}, {31, 12}},
{{44, 34}, {49, 9}, {68, 10}, {37, 23}},
{{33, 54}, {45, 27}, {66, 27}, {31, 38}},
{{19, 82}, {36, 59}, {47, 57}, {20, 64}},
{{ -3, 75}, { -6, 66}, { -5, 71}, { -9, 71}},
{{ -1, 23}, { -7, 35}, {0, 24}, { -7, 37}},
{{1, 34}, { -7, 42}, { -1, 36}, { -8, 44}},
{{1, 43}, { -8, 45}, { -2, 42}, { -11, 49}},
{{0, 54}, { -5, 48}, { -2, 52}, { -10, 56}},
{{ -2, 55}, { -12, 56}, { -9, 57}, { -12, 59}},
{{0, 61}, { -6, 60}, { -6, 63}, { -8, 63}},
{{1, 64}, { -5, 62}, { -4, 65}, { -9, 67}},
{{0, 68}, { -8, 66}, { -4, 67}, { -6, 68}},
{{ -9, 92}, { -8, 76}, { -7, 82}, { -10, 79}},
{{ -14, 106}, { -5, 85}, { -3, 81}, { -3, 78}},
{{ -13, 97}, { -6, 81}, { -3, 76}, { -8, 74}},
{{ -15, 90}, { -10, 77}, { -7, 72}, { -9, 72}},
{{ -12, 90}, { -7, 81}, { -6, 78}, { -10, 72}},
{{ -18, 88}, { -17, 80}, { -12, 72}, { -18, 75}},
{{ -10, 73}, { -18, 73}, { -14, 68}, { -12, 71}},
{{ -9, 79}, { -4, 74}, { -3, 70}, { -11, 63}},
{{ -14, 86}, { -10, 83}, { -6, 76}, { -5, 70}},
{{ -10, 73}, { -9, 71}, { -5, 66}, { -17, 75}},
{{ -10, 70}, { -9, 67}, { -5, 62}, { -14, 72}},
{{ -10, 69}, { -1, 61}, {0, 57}, { -16, 67}},
{{ -5, 66}, { -8, 66}, { -4, 61}, { -8, 53}},
{{ -9, 64}, { -14, 66}, { -9, 60}, { -14, 59}},
{{ -5, 58}, {0, 59}, {1, 54}, { -9, 52}},
{{2, 59}, {2, 59}, {2, 58}, { -11, 68}},
{{21, -10}, {21, -13}, {17, -10}, {9, -2}},
{{24, -11}, {33, -14}, {32, -13}, {30, -10}},
{{28, -8}, {39, -7}, {42, -9}, {31, -4}},
{{28, -1}, {46, -2}, {49, -5}, {33, -1}},
{{29, 3}, {51, 2}, {53, 0}, {33, 7}},
{{29, 9}, {60, 6}, {64, 3}, {31, 12}},
{{35, 20}, {61, 17}, {68, 10}, {37, 23}},
{{29, 36}, {55, 34}, {66, 27}, {31, 38}},
{{14, 67}, {42, 62}, {47, 57}, {20, 64}},
};
/*Table 9-44 Specification of rangeTabLPS depending on pStateIdx and qCodIRangeIdx */
const uint8_t g_kuiCabacRangeLps[64][4] = {
{ 128, 176, 208, 240}, { 128, 167, 197, 227}, { 128, 158, 187, 216}, { 123, 150, 178, 205}, { 116, 142, 169, 195}, { 111, 135, 160, 185}, { 105, 128, 152, 175}, { 100, 122, 144, 166},
{ 95, 116, 137, 158}, { 90, 110, 130, 150}, { 85, 104, 123, 142}, { 81, 99, 117, 135}, { 77, 94, 111, 128}, { 73, 89, 105, 122}, { 69, 85, 100, 116}, { 66, 80, 95, 110},
{ 62, 76, 90, 104}, { 59, 72, 86, 99}, { 56, 69, 81, 94}, { 53, 65, 77, 89}, { 51, 62, 73, 85}, { 48, 59, 69, 80}, { 46, 56, 66, 76}, { 43, 53, 63, 72},
{ 41, 50, 59, 69}, { 39, 48, 56, 65}, { 37, 45, 54, 62}, { 35, 43, 51, 59}, { 33, 41, 48, 56}, { 32, 39, 46, 53}, { 30, 37, 43, 50}, { 29, 35, 41, 48},
{ 27, 33, 39, 45}, { 26, 31, 37, 43}, { 24, 30, 35, 41}, { 23, 28, 33, 39}, { 22, 27, 32, 37}, { 21, 26, 30, 35}, { 20, 24, 29, 33}, { 19, 23, 27, 31},
{ 18, 22, 26, 30}, { 17, 21, 25, 28}, { 16, 20, 23, 27}, { 15, 19, 22, 25}, { 14, 18, 21, 24}, { 14, 17, 20, 23}, { 13, 16, 19, 22}, { 12, 15, 18, 21},
{ 12, 14, 17, 20}, { 11, 14, 16, 19}, { 11, 13, 15, 18}, { 10, 12, 15, 17}, { 10, 12, 14, 16}, { 9, 11, 13, 15}, { 9, 11, 12, 14}, { 8, 10, 12, 14},
{ 8, 9, 11, 13}, { 7, 9, 11, 12}, { 7, 9, 10, 12}, { 7, 8, 10, 11}, { 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2}
};
/*Table 9-45 State transition table*/
const uint8_t g_kuiStateTransTable[64][2] = {
{0, 1}, {0, 2}, {1, 3}, {2, 4}, {2, 5}, {4, 6}, {4, 7}, {5, 8}, {6, 9}, {7, 10},
{8, 11}, {9, 12}, {9, 13}, {11, 14}, {11, 15}, {12, 16}, {13, 17}, {13, 18}, {15, 19}, {15, 20},
{16, 21}, {16, 22}, {18, 23}, {18, 24}, {19, 25}, {19, 26}, {21, 27}, {21, 28}, {22, 29}, {22, 30},
{23, 31}, {24, 32}, {24, 33}, {25, 34}, {26, 35}, {26, 36}, {27, 37}, {27, 38}, {28, 39}, {29, 40},
{29, 41}, {30, 42}, {30, 43}, {30, 44}, {31, 45}, {32, 46}, {32, 47}, {33, 48}, {33, 49}, {33, 50},
{34, 51}, {34, 52}, {35, 53}, {35, 54}, {35, 55}, {36, 56}, {36, 57}, {36, 58}, {37, 59}, {37, 60},
{37, 61}, {38, 62}, {38, 62}, {63, 63}
};
// extern at svc_enc_golomb.h, golomb_common.h
const uint32_t g_kuiGolombUELength[256] = {
1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, //14
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, //30
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,//46
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,//62
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,//
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
17
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}

View File

@ -0,0 +1,111 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file copy_mb.cpp
*
* \brief copy MB YUV data
*
* \date 2014.04.14 Created
*
*************************************************************************************
*/
#include "copy_mb.h"
#include "macros.h"
#include "ls_defines.h"
/****************************************************************************
* Copy functions
****************************************************************************/
void WelsCopy4x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) {
const int32_t kiSrcStride2 = iStrideS << 1;
const int32_t kiSrcStride3 = iStrideS + kiSrcStride2;
const int32_t kiDstStride2 = iStrideD << 1;
const int32_t kiDstStride3 = iStrideD + kiDstStride2;
ST32 (pDst, LD32 (pSrc));
ST32 (pDst + iStrideD, LD32 (pSrc + iStrideS));
ST32 (pDst + kiDstStride2, LD32 (pSrc + kiSrcStride2));
ST32 (pDst + kiDstStride3, LD32 (pSrc + kiSrcStride3));
}
void WelsCopy8x4_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) {
WelsCopy4x4_c (pDst, iStrideD, pSrc, iStrideS);
WelsCopy4x4_c (pDst + 4, iStrideD, pSrc + 4, iStrideS);
}
void WelsCopy4x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) {
WelsCopy4x4_c (pDst, iStrideD, pSrc, iStrideS);
WelsCopy4x4_c (pDst + (iStrideD << 2), iStrideD, pSrc + (iStrideS << 2), iStrideS);
}
void WelsCopy8x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) {
int32_t i;
for (i = 0; i < 4; i++) {
ST32 (pDst, LD32 (pSrc));
ST32 (pDst + 4 , LD32 (pSrc + 4));
ST32 (pDst + iStrideD, LD32 (pSrc + iStrideS));
ST32 (pDst + iStrideD + 4 , LD32 (pSrc + iStrideS + 4));
pDst += iStrideD << 1;
pSrc += iStrideS << 1;
}
}
void WelsCopy8x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) {
int32_t i;
for (i = 0; i < 8; ++i) {
ST32 (pDst, LD32 (pSrc));
ST32 (pDst + 4 , LD32 (pSrc + 4));
ST32 (pDst + iStrideD, LD32 (pSrc + iStrideS));
ST32 (pDst + iStrideD + 4 , LD32 (pSrc + iStrideS + 4));
pDst += iStrideD << 1;
pSrc += iStrideS << 1;
}
}
void WelsCopy16x8_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) {
int32_t i;
for (i = 0; i < 8; i++) {
ST32 (pDst, LD32 (pSrc));
ST32 (pDst + 4 , LD32 (pSrc + 4));
ST32 (pDst + 8 , LD32 (pSrc + 8));
ST32 (pDst + 12 , LD32 (pSrc + 12));
pDst += iStrideD ;
pSrc += iStrideS;
}
}
void WelsCopy16x16_c (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS) {
int32_t i;
for (i = 0; i < 16; i++) {
ST32 (pDst, LD32 (pSrc));
ST32 (pDst + 4 , LD32 (pSrc + 4));
ST32 (pDst + 8 , LD32 (pSrc + 8));
ST32 (pDst + 12 , LD32 (pSrc + 12));
pDst += iStrideD ;
pSrc += iStrideS;
}
}

View File

@ -0,0 +1,359 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file cpu.cpp
*
* \brief CPU compatibility detection
*
* \date 04/29/2009 Created
*
*************************************************************************************
*/
#include <string.h>
#include <stdio.h>
#ifdef ANDROID_NDK
#include <cpu-features.h>
#endif
#include "cpu.h"
#include "cpu_core.h"
#define CPU_Vendor_AMD "AuthenticAMD"
#define CPU_Vendor_INTEL "GenuineIntel"
#define CPU_Vendor_CYRIX "CyrixInstead"
#if defined(X86_ASM)
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
uint32_t uiCPU = 0;
uint32_t uiFeatureA = 0, uiFeatureB = 0, uiFeatureC = 0, uiFeatureD = 0;
int32_t CacheLineSize = 0;
int8_t chVendorName[16] = { 0 };
uint32_t uiMaxCpuidLevel = 0;
if (!WelsCPUIdVerify()) {
/* cpuid is not supported in cpu */
return 0;
}
WelsCPUId (0, &uiFeatureA, (uint32_t*)&chVendorName[0], (uint32_t*)&chVendorName[8], (uint32_t*)&chVendorName[4]);
uiMaxCpuidLevel = uiFeatureA;
if (uiMaxCpuidLevel == 0) {
/* maximum input value for basic cpuid information */
return 0;
}
WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
if ((uiFeatureD & 0x00800000) == 0) {
/* Basic MMX technology is not support in cpu, mean nothing for us so return here */
return 0;
}
uiCPU = WELS_CPU_MMX;
if (uiFeatureD & 0x02000000) {
/* SSE technology is identical to AMD MMX extensions */
uiCPU |= WELS_CPU_MMXEXT | WELS_CPU_SSE;
}
if (uiFeatureD & 0x04000000) {
/* SSE2 support here */
uiCPU |= WELS_CPU_SSE2;
}
if (uiFeatureD & 0x00000001) {
/* x87 FPU on-chip checking */
uiCPU |= WELS_CPU_FPU;
}
if (uiFeatureD & 0x00008000) {
/* CMOV instruction checking */
uiCPU |= WELS_CPU_CMOV;
}
if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) ||
(!strcmp ((const char*)chVendorName, CPU_Vendor_AMD))) { // confirmed_safe_unsafe_usage
if (uiFeatureD & 0x10000000) {
/* Multi-Threading checking: contains of multiple logic processors */
uiCPU |= WELS_CPU_HTT;
}
}
if (uiFeatureC & 0x00000001) {
/* SSE3 support here */
uiCPU |= WELS_CPU_SSE3;
}
if (uiFeatureC & 0x00000200) {
/* SSSE3 support here */
uiCPU |= WELS_CPU_SSSE3;
}
if (uiFeatureC & 0x00080000) {
/* SSE4.1 support here, 45nm Penryn processor */
uiCPU |= WELS_CPU_SSE41;
}
if (uiFeatureC & 0x00100000) {
/* SSE4.2 support here, next generation Nehalem processor */
uiCPU |= WELS_CPU_SSE42;
}
if (WelsCPUSupportAVX (uiFeatureA, uiFeatureC)) {
/* AVX supported */
uiCPU |= WELS_CPU_AVX;
}
if (WelsCPUSupportFMA (uiFeatureA, uiFeatureC)) {
/* AVX FMA supported */
uiCPU |= WELS_CPU_FMA;
}
if (uiFeatureC & 0x02000000) {
/* AES checking */
uiCPU |= WELS_CPU_AES;
}
if (uiFeatureC & 0x00400000) {
/* MOVBE checking */
uiCPU |= WELS_CPU_MOVBE;
}
if (uiMaxCpuidLevel >= 7) {
uiFeatureC = 0;
WelsCPUId (7, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
if ((uiCPU & WELS_CPU_AVX) && (uiFeatureB & 0x00000020)) {
/* AVX2 supported */
uiCPU |= WELS_CPU_AVX2;
}
}
if (pNumberOfLogicProcessors != NULL) {
if (uiCPU & WELS_CPU_HTT) {
*pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX
} else {
*pNumberOfLogicProcessors = 0;
}
if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) {
if (uiMaxCpuidLevel >= 4) {
uiFeatureC = 0;
WelsCPUId (0x4, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
if (uiFeatureA != 0) {
*pNumberOfLogicProcessors = ((uiFeatureA & 0xfc000000) >> 26) + 1;
}
}
}
}
WelsCPUId (0x80000000, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
if ((!strcmp ((const char*)chVendorName, CPU_Vendor_AMD))
&& (uiFeatureA >= 0x80000001)) { // confirmed_safe_unsafe_usage
WelsCPUId (0x80000001, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
if (uiFeatureD & 0x00400000) {
uiCPU |= WELS_CPU_MMXEXT;
}
if (uiFeatureD & 0x80000000) {
uiCPU |= WELS_CPU_3DNOW;
}
}
if (!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL)) { // confirmed_safe_unsafe_usage
int32_t family, model;
WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
family = ((uiFeatureA >> 8) & 0xf) + ((uiFeatureA >> 20) & 0xff);
model = ((uiFeatureA >> 4) & 0xf) + ((uiFeatureA >> 12) & 0xf0);
if ((family == 6) && (model == 9 || model == 13 || model == 14)) {
uiCPU &= ~ (WELS_CPU_SSE2 | WELS_CPU_SSE3);
}
}
// get cache line size
if ((!strcmp ((const char*)chVendorName, CPU_Vendor_INTEL))
|| ! (strcmp ((const char*)chVendorName, CPU_Vendor_CYRIX))) { // confirmed_safe_unsafe_usage
WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
CacheLineSize = (uiFeatureB & 0xff00) >>
5; // ((clflush_line_size >> 8) << 3), CLFLUSH_line_size * 8 = CacheLineSize_in_byte
if (CacheLineSize == 128) {
uiCPU |= WELS_CPU_CACHELINE_128;
} else if (CacheLineSize == 64) {
uiCPU |= WELS_CPU_CACHELINE_64;
} else if (CacheLineSize == 32) {
uiCPU |= WELS_CPU_CACHELINE_32;
} else if (CacheLineSize == 16) {
uiCPU |= WELS_CPU_CACHELINE_16;
}
}
return uiCPU;
}
void WelsCPURestore (const uint32_t kuiCPU) {
if (kuiCPU & (WELS_CPU_MMX | WELS_CPU_MMXEXT | WELS_CPU_3DNOW | WELS_CPU_3DNOWEXT)) {
WelsEmms();
}
}
#elif defined(HAVE_NEON) //For supporting both android platform and iOS platform
#if defined(ANDROID_NDK)
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
uint32_t uiCPU = 0;
AndroidCpuFamily cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;
uint64_t uiFeatures = 0;
cpuFamily = android_getCpuFamily();
if (cpuFamily == ANDROID_CPU_FAMILY_ARM) {
uiFeatures = android_getCpuFeatures();
if (uiFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) {
uiCPU |= WELS_CPU_ARMv7;
}
if (uiFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) {
uiCPU |= WELS_CPU_VFPv3;
}
if (uiFeatures & ANDROID_CPU_ARM_FEATURE_NEON) {
uiCPU |= WELS_CPU_NEON;
}
}
if (pNumberOfLogicProcessors != NULL) {
*pNumberOfLogicProcessors = android_getCpuCount();
}
return uiCPU;
}
#elif defined(__APPLE__)
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
uint32_t uiCPU = 0;
#if defined(__ARM_NEON__)
uiCPU |= WELS_CPU_ARMv7;
uiCPU |= WELS_CPU_VFPv3;
uiCPU |= WELS_CPU_NEON;
#endif
return uiCPU;
}
#elif defined(__linux__)
/* Generic arm/linux cpu feature detection */
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
int flags = 0;
FILE* f = fopen ("/proc/cpuinfo", "r");
#if defined(__chromeos__)
flags |= WELS_CPU_NEON;
#endif
if (!f) {
return flags;
}
char buf[200];
while (fgets (buf, sizeof (buf), f)) {
if (!strncmp (buf, "Features", strlen ("Features"))) {
// The asimd and fp features are listed on 64 bit ARMv8 kernels
if (strstr (buf, " neon ") || strstr (buf, " asimd "))
flags |= WELS_CPU_NEON;
if (strstr (buf, " vfpv3 ") || strstr (buf, " fp "))
flags |= WELS_CPU_VFPv3;
break;
}
}
fclose (f);
return flags;
}
#else /* HAVE_NEON enabled but no runtime detection */
/* No runtime feature detection available, but built with HAVE_NEON - assume
* that NEON and all associated features are available. */
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
return WELS_CPU_ARMv7 |
WELS_CPU_VFPv3 |
WELS_CPU_NEON;
}
#endif
#elif defined(HAVE_NEON_AARCH64)
/* For AArch64, no runtime detection actually is necessary for now, since
* NEON and VFPv3 is mandatory on all such CPUs. (/proc/cpuinfo doesn't
* contain neon, and the android cpufeatures library doesn't return it
* either.) */
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
return WELS_CPU_VFPv3 |
WELS_CPU_NEON;
}
#elif defined(mips)
/* for loongson */
static uint32_t get_cpu_flags_from_cpuinfo(void)
{
uint32_t flags = 0;
# ifdef __linux__
FILE* fp = fopen("/proc/cpuinfo", "r");
if (!fp)
return flags;
char buf[200];
memset(buf, 0, sizeof(buf));
while (fgets(buf, sizeof(buf), fp)) {
if (!strncmp(buf, "model name", strlen("model name"))) {
if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B") ||
strstr(buf, "Loongson-2K")) {
flags |= WELS_CPU_MMI;
}
break;
}
}
while (fgets(buf, sizeof(buf), fp)) {
if(!strncmp(buf, "ASEs implemented", strlen("ASEs implemented"))) {
if (strstr(buf, "loongson-mmi") && strstr(buf, "loongson-ext")) {
flags |= WELS_CPU_MMI;
}
if (strstr(buf, "msa")) {
flags |= WELS_CPU_MSA;
}
break;
}
}
fclose(fp);
# endif
return flags;
}
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
return get_cpu_flags_from_cpuinfo();
}
#else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
return 0;
}
#endif

View File

@ -0,0 +1,266 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file crt_utils_safe_x.cpp
*
* \brief common tool/function utilization
*
* \date 03/10/2009 Created
*
*************************************************************************************
*/
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#if defined(_WIN32)
#include <windows.h>
#include <sys/types.h>
#include <sys/timeb.h>
#ifndef _MSC_VER
#include <sys/time.h>
#endif //!_MSC_VER
#else
#include <sys/time.h>
#endif //_WIN32
#include "macros.h"
#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms
#if defined(_WIN32) && defined(_MSC_VER)
#if defined(_MSC_VER) && (_MSC_VER>=1500)
int32_t WelsSnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, ...) {
va_list pArgPtr;
int32_t iRc;
va_start (pArgPtr, kpFormat);
iRc = vsnprintf_s (pBuffer, iSizeOfBuffer, _TRUNCATE, kpFormat, pArgPtr);
if (iRc < 0)
iRc = iSizeOfBuffer;
va_end (pArgPtr);
return iRc;
}
char* WelsStrncpy (char* pDest, int32_t iSizeInBytes, const char* kpSrc) {
strncpy_s (pDest, iSizeInBytes, kpSrc, _TRUNCATE);
return pDest;
}
int32_t WelsVsnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, va_list pArgPtr) {
int32_t iRc = vsnprintf_s (pBuffer, iSizeOfBuffer, _TRUNCATE, kpFormat, pArgPtr);
if (iRc < 0)
iRc = iSizeOfBuffer;
return iRc;
}
WelsFileHandle* WelsFopen (const char* kpFilename, const char* kpMode) {
WelsFileHandle* pFp = NULL;
if (fopen_s (&pFp, kpFilename, kpMode) != 0) {
return NULL;
}
return pFp;
}
int32_t WelsFclose (WelsFileHandle* pFp) {
return fclose (pFp);
}
int32_t WelsGetTimeOfDay (SWelsTime* pTp) {
return _ftime_s (pTp);
}
int32_t WelsStrftime (char* pBuffer, int32_t iSize, const char* kpFormat, const SWelsTime* kpTp) {
struct tm sTimeNow;
int32_t iRc;
localtime_s (&sTimeNow, &kpTp->time);
iRc = (int32_t)strftime (pBuffer, iSize, kpFormat, &sTimeNow);
if (iRc == 0)
pBuffer[0] = '\0';
return iRc;
}
#else
int32_t WelsSnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, ...) {
va_list pArgPtr;
int32_t iRc;
va_start (pArgPtr, kpFormat);
iRc = vsnprintf (pBuffer, iSizeOfBuffer, kpFormat, pArgPtr); //confirmed_safe_unsafe_usage
if (iRc < 0) {
pBuffer[iSizeOfBuffer - 1] = '\0';
iRc = iSizeOfBuffer;
}
va_end (pArgPtr);
return iRc;
}
char* WelsStrncpy (char* pDest, int32_t iSizeInBytes, const char* kpSrc) {
strncpy (pDest, kpSrc, iSizeInBytes); //confirmed_safe_unsafe_usage
pDest[iSizeInBytes - 1] = '\0';
return pDest;
}
int32_t WelsVsnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, va_list pArgPtr) {
int32_t iRc = vsnprintf (pBuffer, iSizeOfBuffer, kpFormat, pArgPtr); //confirmed_safe_unsafe_usage
if (iRc < 0) {
pBuffer[iSizeOfBuffer - 1] = '\0';
iRc = iSizeOfBuffer;
}
return iRc;
}
WelsFileHandle* WelsFopen (const char* kpFilename, const char* kpMode) {
return fopen (kpFilename, kpMode);
}
int32_t WelsFclose (WelsFileHandle* pFp) {
return fclose (pFp);
}
int32_t WelsGetTimeOfDay (SWelsTime* pTp) {
_ftime (pTp);
return 0;
}
int32_t WelsStrftime (char* pBuffer, int32_t iSize, const char* kpFormat, const SWelsTime* kpTp) {
struct tm* pTnow;
int32_t iRc;
pTnow = localtime (&kpTp->time);
iRc = strftime (pBuffer, iSize, kpFormat, pTnow);
if (iRc == 0)
pBuffer[0] = '\0';
return iRc;
}
#endif // _MSC_VER
#else //GCC
int32_t WelsSnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, ...) {
va_list pArgPtr;
int32_t iRc;
va_start (pArgPtr, kpFormat);
iRc = vsnprintf (pBuffer, iSizeOfBuffer, kpFormat, pArgPtr);
va_end (pArgPtr);
return iRc;
}
char* WelsStrncpy (char* pDest, int32_t iSizeInBytes, const char* kpSrc) {
strncpy (pDest, kpSrc, iSizeInBytes); //confirmed_safe_unsafe_usage
pDest[iSizeInBytes - 1] = '\0';
return pDest;
}
int32_t WelsVsnprintf (char* pBuffer, int32_t iSizeOfBuffer, const char* kpFormat, va_list pArgPtr) {
return vsnprintf (pBuffer, iSizeOfBuffer, kpFormat, pArgPtr); //confirmed_safe_unsafe_usage
}
WelsFileHandle* WelsFopen (const char* kpFilename, const char* kpMode) {
return fopen (kpFilename, kpMode);
}
int32_t WelsFclose (WelsFileHandle* pFp) {
return fclose (pFp);
}
int32_t WelsGetTimeOfDay (SWelsTime* pTp) {
struct timeval sTv;
if (gettimeofday (&sTv, NULL)) {
return -1;
}
pTp->time = sTv.tv_sec;
pTp->millitm = (uint16_t)sTv.tv_usec / 1000;
return 0;
}
int32_t WelsStrftime (char* pBuffer, int32_t iSize, const char* kpFormat, const SWelsTime* kpTp) {
struct tm* pTnow;
int32_t iRc;
pTnow = localtime (&kpTp->time);
iRc = (int32_t) strftime (pBuffer, iSize, kpFormat, pTnow);
if (iRc == 0)
pBuffer[0] = '\0';
return iRc;
}
#endif
char* WelsStrcat (char* pDest, uint32_t uiSizeInBytes, const char* kpSrc) {
uint32_t uiCurLen = (uint32_t) strlen (pDest);
if (uiSizeInBytes > uiCurLen)
return WelsStrncpy (pDest + uiCurLen, uiSizeInBytes - uiCurLen, kpSrc);
return pDest;
}
int32_t WelsFwrite (const void* kpBuffer, int32_t iSize, int32_t iCount, WelsFileHandle* pFp) {
return (int32_t)fwrite (kpBuffer, iSize, iCount, pFp);
}
uint16_t WelsGetMillisecond (const SWelsTime* kpTp) {
return kpTp->millitm;
}
int32_t WelsFseek (WelsFileHandle* fp, int32_t offset, int32_t origin) {
return fseek (fp, offset, origin);
}
int32_t WelsFflush (WelsFileHandle* pFp) {
return fflush (pFp);
}

View File

@ -0,0 +1,295 @@
#include "deblocking_common.h"
#include "macros.h"
// C code only
void DeblockLumaLt4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta,
int8_t* pTc) {
for (int32_t i = 0; i < 16; i++) {
int32_t iTc0 = pTc[i >> 2];
if (iTc0 >= 0) {
int32_t p0 = pPix[-iStrideX];
int32_t p1 = pPix[-2 * iStrideX];
int32_t p2 = pPix[-3 * iStrideX];
int32_t q0 = pPix[0];
int32_t q1 = pPix[iStrideX];
int32_t q2 = pPix[2 * iStrideX];
bool bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
bool bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
bool bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
int32_t iTc = iTc0;
if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
bool bDetaP2P0 = WELS_ABS (p2 - p0) < iBeta;
bool bDetaQ2Q0 = WELS_ABS (q2 - q0) < iBeta;
if (bDetaP2P0) {
pPix[-2 * iStrideX] = p1 + WELS_CLIP3 ((p2 + ((p0 + q0 + 1) >> 1) - (p1 * (1 << 1))) >> 1, -iTc0, iTc0);
iTc++;
}
if (bDetaQ2Q0) {
pPix[iStrideX] = q1 + WELS_CLIP3 ((q2 + ((p0 + q0 + 1) >> 1) - (q1 * (1 << 1))) >> 1, -iTc0, iTc0);
iTc++;
}
int32_t iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc, iTc);
pPix[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */
pPix[0] = WelsClip1 (q0 - iDeta); /* q0' */
}
}
pPix += iStrideY;
}
}
void DeblockLumaEq4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta) {
int32_t p0, p1, p2, q0, q1, q2;
int32_t iDetaP0Q0;
bool bDetaP1P0, bDetaQ1Q0;
for (int32_t i = 0; i < 16; i++) {
p0 = pPix[-iStrideX];
p1 = pPix[-2 * iStrideX];
p2 = pPix[-3 * iStrideX];
q0 = pPix[0];
q1 = pPix[iStrideX];
q2 = pPix[2 * iStrideX];
iDetaP0Q0 = WELS_ABS (p0 - q0);
bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
if ((iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0) {
if (iDetaP0Q0 < ((iAlpha >> 2) + 2)) {
bool bDetaP2P0 = WELS_ABS (p2 - p0) < iBeta;
bool bDetaQ2Q0 = WELS_ABS (q2 - q0) < iBeta;
if (bDetaP2P0) {
const int32_t p3 = pPix[-4 * iStrideX];
pPix[-iStrideX] = (p2 + (p1 * (1 << 1)) + (p0 * (1 << 1)) + (q0 * (1 << 1)) + q1 + 4) >> 3; //p0
pPix[-2 * iStrideX] = (p2 + p1 + p0 + q0 + 2) >> 2; //p1
pPix[-3 * iStrideX] = ((p3 * (1 << 1)) + p2 + (p2 * (1 << 1)) + p1 + p0 + q0 + 4) >> 3; //p2
} else {
pPix[-1 * iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; //p0
}
if (bDetaQ2Q0) {
const int32_t q3 = pPix[3 * iStrideX];
pPix[0] = (p1 + (p0 * (1 << 1)) + (q0 * (1 << 1)) + (q1 * (1 << 1)) + q2 + 4) >> 3; //q0
pPix[iStrideX] = (p0 + q0 + q1 + q2 + 2) >> 2; //q1
pPix[2 * iStrideX] = ((q3 * (1 << 1)) + q2 + (q2 * (1 << 1)) + q1 + q0 + p0 + 4) >> 3; //q2
} else {
pPix[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; //q0
}
} else {
pPix[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; //p0
pPix[ 0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; //q0
}
}
pPix += iStrideY;
}
}
void DeblockLumaLt4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) {
DeblockLumaLt4_c (pPix, iStride, 1, iAlpha, iBeta, tc);
}
void DeblockLumaLt4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) {
DeblockLumaLt4_c (pPix, 1, iStride, iAlpha, iBeta, tc);
}
void DeblockLumaEq4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
DeblockLumaEq4_c (pPix, iStride, 1, iAlpha, iBeta);
}
void DeblockLumaEq4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
DeblockLumaEq4_c (pPix, 1, iStride, iAlpha, iBeta);
}
void DeblockChromaLt4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
int32_t iBeta, int8_t* pTc) {
int32_t p0, p1, q0, q1, iDeta;
bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
for (int32_t i = 0; i < 8; i++) {
int32_t iTc0 = pTc[i >> 1];
if (iTc0 > 0) {
p0 = pPixCb[-iStrideX];
p1 = pPixCb[-2 * iStrideX];
q0 = pPixCb[0];
q1 = pPixCb[iStrideX];
bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc0, iTc0);
pPixCb[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */
pPixCb[0] = WelsClip1 (q0 - iDeta); /* q0' */
}
p0 = pPixCr[-iStrideX];
p1 = pPixCr[-2 * iStrideX];
q0 = pPixCr[0];
q1 = pPixCr[iStrideX];
bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc0, iTc0);
pPixCr[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */
pPixCr[0] = WelsClip1 (q0 - iDeta); /* q0' */
}
}
pPixCb += iStrideY;
pPixCr += iStrideY;
}
}
void DeblockChromaEq4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
int32_t iBeta) {
int32_t p0, p1, q0, q1;
bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
for (int32_t i = 0; i < 8; i++) {
//cb
p0 = pPixCb[-iStrideX];
p1 = pPixCb[-2 * iStrideX];
q0 = pPixCb[0];
q1 = pPixCb[iStrideX];
bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
pPixCb[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; /* p0' */
pPixCb[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; /* q0' */
}
//cr
p0 = pPixCr[-iStrideX];
p1 = pPixCr[-2 * iStrideX];
q0 = pPixCr[0];
q1 = pPixCr[iStrideX];
bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
pPixCr[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; /* p0' */
pPixCr[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; /* q0' */
}
pPixCr += iStrideY;
pPixCb += iStrideY;
}
}
void DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* tc) {
DeblockChromaLt4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta, tc);
}
void DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* tc) {
DeblockChromaLt4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta, tc);
}
void DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
DeblockChromaEq4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta);
}
void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
DeblockChromaEq4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta);
}
void DeblockChromaLt42_c (uint8_t* pPixCbCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
int32_t iBeta, int8_t* pTc) {
int32_t p0, p1, q0, q1, iDeta;
bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
for (int32_t i = 0; i < 8; i++) {
int32_t iTc0 = pTc[i >> 1];
if (iTc0 > 0) {
p0 = pPixCbCr[-iStrideX];
p1 = pPixCbCr[-2 * iStrideX];
q0 = pPixCbCr[0];
q1 = pPixCbCr[iStrideX];
bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc0, iTc0);
pPixCbCr[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */
pPixCbCr[0] = WelsClip1 (q0 - iDeta); /* q0' */
}
}
pPixCbCr += iStrideY;
}
}
void DeblockChromaEq42_c (uint8_t* pPixCbCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
int32_t iBeta) {
int32_t p0, p1, q0, q1;
bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
for (int32_t i = 0; i < 8; i++) {
p0 = pPixCbCr[-iStrideX];
p1 = pPixCbCr[-2 * iStrideX];
q0 = pPixCbCr[0];
q1 = pPixCbCr[iStrideX];
bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
pPixCbCr[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; /* p0' */
pPixCbCr[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; /* q0' */
}
pPixCbCr += iStrideY;
}
}
void DeblockChromaLt4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* tc) {
DeblockChromaLt42_c (pPixCbCr, iStride, 1, iAlpha, iBeta, tc);
}
void DeblockChromaLt4H2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* tc) {
DeblockChromaLt42_c (pPixCbCr, 1, iStride, iAlpha, iBeta, tc);
}
void DeblockChromaEq4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
DeblockChromaEq42_c (pPixCbCr, iStride, 1, iAlpha, iBeta);
}
void DeblockChromaEq4H2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
DeblockChromaEq42_c (pPixCbCr, 1, iStride, iAlpha, iBeta);
}
void WelsNonZeroCount_c (int8_t* pNonZeroCount) {
int32_t i;
for (i = 0; i < 24; i++) {
pNonZeroCount[i] = !!pNonZeroCount[i];
}
}
#ifdef X86_ASM
extern "C" {
void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
DeblockLumaLt4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc);
DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
}
void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
DeblockLumaEq4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta);
DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
}
}
#endif
#ifdef HAVE_MMI
extern "C" {
void DeblockLumaLt4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
DeblockLumaTransposeH2V_mmi (pPixY - 4, iStride, &uiBuf[0]);
DeblockLumaLt4V_mmi (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc);
DeblockLumaTransposeV2H_mmi (pPixY - 4, iStride, &uiBuf[0]);
}
void DeblockLumaEq4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16);
DeblockLumaTransposeH2V_mmi (pPixY - 4, iStride, &uiBuf[0]);
DeblockLumaEq4V_mmi (&uiBuf[4 * 16], 16, iAlpha, iBeta);
DeblockLumaTransposeV2H_mmi (pPixY - 4, iStride, &uiBuf[0]);
}
}
#endif//HAVE_MMI

View File

@ -0,0 +1,415 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <string.h>
#include "expand_pic.h"
#include "cpu_core.h"
static inline void MBPadTopLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride) {
const uint8_t kuiTL = pDst[0];
int32_t i = 0;
uint8_t* pTopLeft = pDst;
do {
pTopLeft -= kiStride;
// pad pTop
memcpy (pTopLeft, pDst, 16); // confirmed_safe_unsafe_usage
memset (pTopLeft - PADDING_LENGTH, kuiTL, PADDING_LENGTH); //pTop left
} while (++i < PADDING_LENGTH);
}
static inline void MBPadTopLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) {
uint8_t* pTopLine = pDst + (kiMbX << 4);
int32_t i = 0;
uint8_t* pTop = pTopLine;
do {
pTop -= kiStride;
// pad pTop
memcpy (pTop, pTopLine, 16); // confirmed_safe_unsafe_usage
} while (++i < PADDING_LENGTH);
}
static inline void MBPadBottomLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX,
const int32_t& kiPicH) {
uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 4);
int32_t i = 0;
uint8_t* pBottom = pBottomLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom, pBottomLine, 16); // confirmed_safe_unsafe_usage
} while (++i < PADDING_LENGTH);
}
static inline void MBPadTopRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) {
uint8_t* pTopRight = pDst + kiPicW;
const uint8_t kuiTR = pTopRight[-1];
int32_t i = 0;
uint8_t* pTop = pTopRight;
do {
pTop -= kiStride;
// pad pTop
memcpy (pTop - 16, pTopRight - 16, 16); // confirmed_safe_unsafe_usage
memset (pTop, kuiTR, PADDING_LENGTH); //pTop Right
} while (++i < PADDING_LENGTH);
}
static inline void MBPadBottomLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) {
uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride;
const uint8_t kuiBL = pDstLastLine[0];
int32_t i = 0;
uint8_t* pBottom = pDstLastLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom, pDstLastLine, 16); // confirmed_safe_unsafe_usage
memset (pBottom - PADDING_LENGTH, kuiBL, PADDING_LENGTH); //pBottom left
} while (++i < PADDING_LENGTH);
}
static inline void MBPadBottomRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW,
const int32_t& kiPicH) {
uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW;
const uint8_t kuiBR = pDstLastLine[-1];
int32_t i = 0;
uint8_t* pBottom = pDstLastLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom - 16, pDstLastLine - 16, 16); // confirmed_safe_unsafe_usage
memset (pBottom, kuiBR, PADDING_LENGTH); //pBottom Right
} while (++i < PADDING_LENGTH);
}
static inline void MBPadLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) {
uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride;
for (int32_t i = 0; i < 16; ++i) {
// pad left
memset (pTmp - PADDING_LENGTH, pTmp[0], PADDING_LENGTH);
pTmp += kiStride;
}
}
static inline void MBPadRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY,
const int32_t& kiPicW) {
uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride + kiPicW;
for (int32_t i = 0; i < 16; ++i) {
// pad right
memset (pTmp, pTmp[-1], PADDING_LENGTH);
pTmp += kiStride;
}
}
static inline void MBPadTopChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) {
uint8_t* pTopLine = pDst + (kiMbX << 3);
int32_t i = 0;
uint8_t* pTop = pTopLine;
do {
pTop -= kiStride;
// pad pTop
memcpy (pTop, pTopLine, 8); // confirmed_safe_unsafe_usage
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadBottomChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX,
const int32_t& kiPicH) {
uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 3);
int32_t i = 0;
uint8_t* pBottom = pBottomLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom, pBottomLine, 8); // confirmed_safe_unsafe_usage
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadTopLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride) {
const uint8_t kuiTL = pDst[0];
int32_t i = 0;
uint8_t* pTopLeft = pDst;
do {
pTopLeft -= kiStride;
// pad pTop
memcpy (pTopLeft, pDst, 8); // confirmed_safe_unsafe_usage
memset (pTopLeft - CHROMA_PADDING_LENGTH, kuiTL, CHROMA_PADDING_LENGTH); //pTop left
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadTopRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) {
uint8_t* pTopRight = pDst + kiPicW;
const uint8_t kuiTR = pTopRight[-1];
int32_t i = 0;
uint8_t* pTop = pTopRight;
do {
pTop -= kiStride;
// pad pTop
memcpy (pTop - 8, pTopRight - 8, 8); // confirmed_safe_unsafe_usage
memset (pTop, kuiTR, CHROMA_PADDING_LENGTH); //pTop Right
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadBottomLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) {
uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride;
const uint8_t kuiBL = pDstLastLine[0];
int32_t i = 0;
uint8_t* pBottom = pDstLastLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom, pDstLastLine, 8); // confirmed_safe_unsafe_usage
memset (pBottom - CHROMA_PADDING_LENGTH, kuiBL, CHROMA_PADDING_LENGTH); //pBottom left
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadBottomRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW,
const int32_t kiPicH) {
uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW;
const uint8_t kuiBR = pDstLastLine[-1];
int32_t i = 0;
uint8_t* pBottom = pDstLastLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom - 8, pDstLastLine - 8, 8); // confirmed_safe_unsafe_usage
memset (pBottom, kuiBR, CHROMA_PADDING_LENGTH); //pBottom Right
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) {
uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride;
for (int32_t i = 0; i < 8; ++i) {
// pad left
memset (pTmp - CHROMA_PADDING_LENGTH, pTmp[0], CHROMA_PADDING_LENGTH);
pTmp += kiStride;
}
}
static inline void MBPadRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY,
const int32_t& kiPicW) {
uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride + kiPicW;
for (int32_t i = 0; i < 8; ++i) {
// pad right
memset (pTmp, pTmp[-1], CHROMA_PADDING_LENGTH);
pTmp += kiStride;
}
}
void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) {
if (kiMbX == 0 && kiMbY == 0) {
MBPadTopLeftLuma_c (pDst, kiStride);
} else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) {
MBPadTopRightLuma_c (pDst, kiStride, kiPicW);
} else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) {
MBPadBottomLeftLuma_c (pDst, kiStride, kiPicH);
} else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) {
MBPadBottomRightLuma_c (pDst, kiStride, kiPicW, kiPicH);
}
if (kiMbX == 0) {
MBPadLeftLuma_c (pDst, kiStride, kiMbY);
} else if (kiMbX == kiMBWidth - 1) {
MBPadRightLuma_c (pDst, kiStride, kiMbY, kiPicW);
}
if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
MBPadTopLuma_c (pDst, kiStride, kiMbX);
} else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
MBPadBottomLuma_c (pDst, kiStride, kiMbX, kiPicH);
}
}
void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) {
if (kiMbX == 0 && kiMbY == 0) {
MBPadTopLeftChroma_c (pDst, kiStride);
} else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) {
MBPadTopRightChroma_c (pDst, kiStride, kiPicW);
} else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) {
MBPadBottomLeftChroma_c (pDst, kiStride, kiPicH);
} else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) {
MBPadBottomRightChroma_c (pDst, kiStride, kiPicW, kiPicH);
}
if (kiMbX == 0) {
MBPadLeftChroma_c (pDst, kiStride, kiMbY);
} else if (kiMbX == kiMBWidth - 1) {
MBPadRightChroma_c (pDst, kiStride, kiMbY, kiPicW);
}
if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
MBPadTopChroma_c (pDst, kiStride, kiMbX);
} else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
MBPadBottomChroma_c (pDst, kiStride, kiMbX, kiPicH);
}
}
// rewrite it (split into luma & chroma) that is helpful for mmx/sse2 optimization perform, 9/27/2009
static inline void ExpandPictureLuma_c (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
const int32_t kiPicH) {
uint8_t* pTmp = pDst;
uint8_t* pDstLastLine = pTmp + (kiPicH - 1) * kiStride;
const int32_t kiPaddingLen = PADDING_LENGTH;
const uint8_t kuiTL = pTmp[0];
const uint8_t kuiTR = pTmp[kiPicW - 1];
const uint8_t kuiBL = pDstLastLine[0];
const uint8_t kuiBR = pDstLastLine[kiPicW - 1];
int32_t i = 0;
do {
const int32_t kiStrides = (1 + i) * kiStride;
uint8_t* pTop = pTmp - kiStrides;
uint8_t* pBottom = pDstLastLine + kiStrides;
// pad pTop and pBottom
memcpy (pTop, pTmp, kiPicW); // confirmed_safe_unsafe_usage
memcpy (pBottom, pDstLastLine, kiPicW); // confirmed_safe_unsafe_usage
// pad corners
memset (pTop - kiPaddingLen, kuiTL, kiPaddingLen); //pTop left
memset (pTop + kiPicW, kuiTR, kiPaddingLen); //pTop right
memset (pBottom - kiPaddingLen, kuiBL, kiPaddingLen); //pBottom left
memset (pBottom + kiPicW, kuiBR, kiPaddingLen); //pBottom right
++ i;
} while (i < kiPaddingLen);
// pad left and right
i = 0;
do {
memset (pTmp - kiPaddingLen, pTmp[0], kiPaddingLen);
memset (pTmp + kiPicW, pTmp[kiPicW - 1], kiPaddingLen);
pTmp += kiStride;
++ i;
} while (i < kiPicH);
}
static inline void ExpandPictureChroma_c (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
const int32_t kiPicH) {
uint8_t* pTmp = pDst;
uint8_t* pDstLastLine = pTmp + (kiPicH - 1) * kiStride;
const int32_t kiPaddingLen = (PADDING_LENGTH >> 1);
const uint8_t kuiTL = pTmp[0];
const uint8_t kuiTR = pTmp[kiPicW - 1];
const uint8_t kuiBL = pDstLastLine[0];
const uint8_t kuiBR = pDstLastLine[kiPicW - 1];
int32_t i = 0;
do {
const int32_t kiStrides = (1 + i) * kiStride;
uint8_t* pTop = pTmp - kiStrides;
uint8_t* pBottom = pDstLastLine + kiStrides;
// pad pTop and pBottom
memcpy (pTop, pTmp, kiPicW); // confirmed_safe_unsafe_usage
memcpy (pBottom, pDstLastLine, kiPicW); // confirmed_safe_unsafe_usage
// pad corners
memset (pTop - kiPaddingLen, kuiTL, kiPaddingLen); //pTop left
memset (pTop + kiPicW, kuiTR, kiPaddingLen); //pTop right
memset (pBottom - kiPaddingLen, kuiBL, kiPaddingLen); //pBottom left
memset (pBottom + kiPicW, kuiBR, kiPaddingLen); //pBottom right
++ i;
} while (i < kiPaddingLen);
// pad left and right
i = 0;
do {
memset (pTmp - kiPaddingLen, pTmp[0], kiPaddingLen);
memset (pTmp + kiPicW, pTmp[kiPicW - 1], kiPaddingLen);
pTmp += kiStride;
++ i;
} while (i < kiPicH);
}
void InitExpandPictureFunc (SExpandPicFunc* pExpandPicFunc, const uint32_t kuiCPUFlag) {
pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_c;
pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChroma_c;
pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChroma_c;
#if defined(X86_ASM)
if ((kuiCPUFlag & WELS_CPU_SSE2) == WELS_CPU_SSE2) {
pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_sse2;
pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChromaUnalign_sse2;
pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChromaAlign_sse2;
}
#endif//X86_ASM
#if defined(HAVE_NEON)
if (kuiCPUFlag & WELS_CPU_NEON) {
pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_neon;
pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChroma_neon;
pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChroma_neon;
}
#endif//HAVE_NEON
#if defined(HAVE_NEON_AARCH64)
if (kuiCPUFlag & WELS_CPU_NEON) {
pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_AArch64_neon;
pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChroma_AArch64_neon;
pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChroma_AArch64_neon;
}
#endif//HAVE_NEON_AARCH64
#if defined(HAVE_MMI)
if (kuiCPUFlag & WELS_CPU_MMI) {
pExpandPicFunc->pfExpandLumaPicture = ExpandPictureLuma_mmi;
pExpandPicFunc->pfExpandChromaPicture[0] = ExpandPictureChromaUnalign_mmi;
pExpandPicFunc->pfExpandChromaPicture[1] = ExpandPictureChromaAlign_mmi;
}
#endif//HAVE_MMI
}
//void ExpandReferencingPicture (SPicture* pPic, PExpandPictureFunc pExpLuma, PExpandPictureFunc pExpChrom[2]) {
void ExpandReferencingPicture (uint8_t* pData[3], int32_t iWidth, int32_t iHeight, int32_t iStride[3],
PExpandPictureFunc pExpLuma, PExpandPictureFunc pExpChrom[2]) {
/*local variable*/
uint8_t* pPicY = pData[0];
uint8_t* pPicCb = pData[1];
uint8_t* pPicCr = pData[2];
const int32_t kiWidthY = iWidth;
const int32_t kiHeightY = iHeight;
const int32_t kiWidthUV = kiWidthY >> 1;
const int32_t kiHeightUV = kiHeightY >> 1;
pExpLuma (pPicY, iStride[0], kiWidthY, kiHeightY);
if (kiWidthUV >= 16) {
// fix coding picture size as 16x16
const bool kbChrAligned = /*(iWidthUV >= 16) && */ ((kiWidthUV & 0x0F) == 0); // chroma planes: (16+iWidthUV) & 15
pExpChrom[kbChrAligned] (pPicCb, iStride[1], kiWidthUV, kiHeightUV);
pExpChrom[kbChrAligned] (pPicCr, iStride[2], kiWidthUV, kiHeightUV);
} else {
// fix coding picture size as 16x16
ExpandPictureChroma_c (pPicCb, iStride[1], kiWidthUV, kiHeightUV);
ExpandPictureChroma_c (pPicCr, iStride[2], kiWidthUV, kiHeightUV);
}
}

View File

@ -0,0 +1,77 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file get_intra_predictor.c
*
* \brief implementation for get intra predictor about 16x16, 4x4, chroma.
*
* \date 4/2/2009 Created
* 9/14/2009 C level based optimization with high performance gained.
* [const, using ST32/ST64 to replace memset, memcpy and memmove etc.]
*
*************************************************************************************
*/
#include "ls_defines.h"
#include "cpu_core.h"
#include "intra_pred_common.h"
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
uint8_t i = 15;
const int8_t* kpSrc = (int8_t*)&pRef[-kiStride];
const uint64_t kuiT1 = LD64 (kpSrc);
const uint64_t kuiT2 = LD64 (kpSrc + 8);
uint8_t* pDst = pPred;
do {
ST64 (pDst , kuiT1);
ST64 (pDst + 8, kuiT2);
pDst += 16;
} while (i-- > 0);
}
void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
int32_t iStridex15 = (kiStride << 4) - kiStride;
int32_t iPredStride = 16;
int32_t iPredStridex15 = 240; //(iPredStride<<4)-iPredStride;
uint8_t i = 15;
do {
const uint8_t kuiSrc8 = pRef[iStridex15 - 1];
const uint64_t kuiV64 = (uint64_t) (0x0101010101010101ULL * kuiSrc8);
ST64 (&pPred[iPredStridex15], kuiV64);
ST64 (&pPred[iPredStridex15 + 8], kuiV64);
iStridex15 -= kiStride;
iPredStridex15 -= iPredStride;
} while (i-- > 0);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,174 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <stdlib.h>
#include <string.h>
#include "memory_align.h"
#include "macros.h"
namespace WelsCommon {
#ifdef MEMORY_CHECK
static FILE* fpMemChkPoint;
static uint32_t nCountRequestNum;
static int32_t g_iMemoryLength;
#endif
CMemoryAlign::CMemoryAlign (const uint32_t kuiCacheLineSize)
#ifdef MEMORY_MONITOR
: m_nMemoryUsageInBytes (0)
#endif//MEMORY_MONITOR
{
if ((kuiCacheLineSize == 0) || (kuiCacheLineSize & 0x0f))
m_nCacheLineSize = 0x10;
else
m_nCacheLineSize = kuiCacheLineSize;
}
CMemoryAlign::~CMemoryAlign() {
#ifdef MEMORY_MONITOR
assert (m_nMemoryUsageInBytes == 0);
#endif//MEMORY_MONITOR
}
void* WelsMalloc (const uint32_t kuiSize, const char* kpTag, const uint32_t kiAlign) {
const int32_t kiSizeOfVoidPointer = sizeof (void**);
const int32_t kiSizeOfInt = sizeof (int32_t);
const int32_t kiAlignedBytes = kiAlign - 1;
const int32_t kiTrialRequestedSize = kuiSize + kiAlignedBytes + kiSizeOfVoidPointer + kiSizeOfInt;
const int32_t kiActualRequestedSize = kiTrialRequestedSize;
const uint32_t kiPayloadSize = kuiSize;
uint8_t* pBuf = (uint8_t*) malloc (kiActualRequestedSize);
if (NULL == pBuf)
return NULL;
#ifdef MEMORY_CHECK
if (fpMemChkPoint == NULL) {
fpMemChkPoint = fopen ("./enc_mem_check_point.txt", "at+");
nCountRequestNum = 0;
}
if (fpMemChkPoint != NULL) {
if (kpTag != NULL)
fprintf (fpMemChkPoint, "WelsMalloc(), 0x%x : actual uiSize:\t%d\tbytes, input uiSize: %d bytes, %d - %s\n",
(void*)pBuf, kiActualRequestedSize, kuiSize, nCountRequestNum++, kpTag);
else
fprintf (fpMemChkPoint, "WelsMalloc(), 0x%x : actual uiSize:\t%d\tbytes, input uiSize: %d bytes, %d \n", (void*)pBuf,
kiActualRequestedSize, kuiSize, nCountRequestNum++);
fflush (fpMemChkPoint);
}
#endif
uint8_t* pAlignedBuffer;
pAlignedBuffer = pBuf + kiAlignedBytes + kiSizeOfVoidPointer + kiSizeOfInt;
pAlignedBuffer -= ((uintptr_t) pAlignedBuffer & kiAlignedBytes);
* ((void**) (pAlignedBuffer - kiSizeOfVoidPointer)) = pBuf;
* ((int32_t*) (pAlignedBuffer - (kiSizeOfVoidPointer + kiSizeOfInt))) = kiPayloadSize;
return pAlignedBuffer;
}
void WelsFree (void* pPointer, const char* kpTag) {
if (pPointer) {
#ifdef MEMORY_CHECK
if (fpMemChkPoint != NULL) {
if (kpTag != NULL)
fprintf (fpMemChkPoint, "WelsFree(), 0x%x - %s: \t%d\t bytes \n", (void*) (* (((void**) pPointer) - 1)), kpTag,
g_iMemoryLength);
else
fprintf (fpMemChkPoint, "WelsFree(), 0x%x \n", (void*) (* (((void**) pPointer) - 1)));
fflush (fpMemChkPoint);
}
#endif
free (* (((void**) pPointer) - 1));
}
}
void* CMemoryAlign::WelsMallocz (const uint32_t kuiSize, const char* kpTag) {
void* pPointer = WelsMalloc (kuiSize, kpTag);
if (NULL == pPointer) {
return NULL;
}
// zero memory
memset (pPointer, 0, kuiSize);
return pPointer;
}
void* CMemoryAlign::WelsMalloc (const uint32_t kuiSize, const char* kpTag) {
void* pPointer = WelsCommon::WelsMalloc (kuiSize, kpTag, m_nCacheLineSize);
#ifdef MEMORY_MONITOR
if (pPointer != NULL) {
const int32_t kiMemoryLength = * ((int32_t*) ((uint8_t*)pPointer - sizeof (void**) - sizeof (
int32_t))) + m_nCacheLineSize - 1 + sizeof (void**) + sizeof (int32_t);
m_nMemoryUsageInBytes += kiMemoryLength;
#ifdef MEMORY_CHECK
g_iMemoryLength = kiMemoryLength;
#endif
}
#endif//MEMORY_MONITOR
return pPointer;
}
void CMemoryAlign::WelsFree (void* pPointer, const char* kpTag) {
#ifdef MEMORY_MONITOR
if (pPointer) {
const int32_t kiMemoryLength = * ((int32_t*) ((uint8_t*)pPointer - sizeof (void**) - sizeof (
int32_t))) + m_nCacheLineSize - 1 + sizeof (void**) + sizeof (int32_t);
m_nMemoryUsageInBytes -= kiMemoryLength;
#ifdef MEMORY_CHECK
g_iMemoryLength = kiMemoryLength;
#endif
}
#endif//MEMORY_MONITOR
WelsCommon::WelsFree (pPointer, kpTag);
}
void* WelsMallocz (const uint32_t kuiSize, const char* kpTag) {
void* pPointer = WelsMalloc (kuiSize, kpTag, 16);
if (NULL == pPointer) {
return NULL;
}
memset (pPointer, 0, kuiSize);
return pPointer;
}
const uint32_t CMemoryAlign::WelsGetCacheLineSize() const {
return m_nCacheLineSize;
}
const uint32_t CMemoryAlign::WelsGetMemoryUsage() const {
return m_nMemoryUsageInBytes;
}
} // end of namespace WelsCommon

View File

@ -0,0 +1,165 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file sample.c
*
* \brief compute SAD and SATD
*
* \date 2009.06.02 Created
*
*************************************************************************************
*/
#include "sad_common.h"
#include "macros.h"
int32_t WelsSampleSad4x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
int32_t iSadSum = 0;
int32_t i = 0;
uint8_t* pSrc1 = pSample1;
uint8_t* pSrc2 = pSample2;
for (i = 0; i < 4; i++) {
iSadSum += WELS_ABS ((pSrc1[0] - pSrc2[0]));
iSadSum += WELS_ABS ((pSrc1[1] - pSrc2[1]));
iSadSum += WELS_ABS ((pSrc1[2] - pSrc2[2]));
iSadSum += WELS_ABS ((pSrc1[3] - pSrc2[3]));
pSrc1 += iStride1;
pSrc2 += iStride2;
}
return iSadSum;
}
int32_t WelsSampleSad8x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
int32_t iSadSum = 0;
iSadSum += WelsSampleSad4x4_c (pSample1, iStride1, pSample2, iStride2);
iSadSum += WelsSampleSad4x4_c (pSample1 + 4, iStride1, pSample2 + 4, iStride2);
return iSadSum;
}
int32_t WelsSampleSad4x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
int32_t iSadSum = 0;
iSadSum += WelsSampleSad4x4_c (pSample1, iStride1, pSample2, iStride2);
iSadSum += WelsSampleSad4x4_c (pSample1 + (iStride1 << 2), iStride1, pSample2 + (iStride2 << 2), iStride2);
return iSadSum;
}
int32_t WelsSampleSad8x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
int32_t iSadSum = 0;
int32_t i = 0;
uint8_t* pSrc1 = pSample1;
uint8_t* pSrc2 = pSample2;
for (i = 0; i < 8; i++) {
iSadSum += WELS_ABS ((pSrc1[0] - pSrc2[0]));
iSadSum += WELS_ABS ((pSrc1[1] - pSrc2[1]));
iSadSum += WELS_ABS ((pSrc1[2] - pSrc2[2]));
iSadSum += WELS_ABS ((pSrc1[3] - pSrc2[3]));
iSadSum += WELS_ABS ((pSrc1[4] - pSrc2[4]));
iSadSum += WELS_ABS ((pSrc1[5] - pSrc2[5]));
iSadSum += WELS_ABS ((pSrc1[6] - pSrc2[6]));
iSadSum += WELS_ABS ((pSrc1[7] - pSrc2[7]));
pSrc1 += iStride1;
pSrc2 += iStride2;
}
return iSadSum;
}
int32_t WelsSampleSad16x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
int32_t iSadSum = 0;
iSadSum += WelsSampleSad8x8_c (pSample1, iStride1, pSample2, iStride2);
iSadSum += WelsSampleSad8x8_c (pSample1 + 8, iStride1, pSample2 + 8, iStride2);
return iSadSum;
}
int32_t WelsSampleSad8x16_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
int32_t iSadSum = 0;
iSadSum += WelsSampleSad8x8_c (pSample1, iStride1, pSample2, iStride2);
iSadSum += WelsSampleSad8x8_c (pSample1 + (iStride1 << 3), iStride1, pSample2 + (iStride2 << 3), iStride2);
return iSadSum;
}
int32_t WelsSampleSad16x16_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
int32_t iSadSum = 0;
iSadSum += WelsSampleSad8x8_c (pSample1, iStride1, pSample2, iStride2);
iSadSum += WelsSampleSad8x8_c (pSample1 + 8, iStride1, pSample2 + 8, iStride2);
iSadSum += WelsSampleSad8x8_c (pSample1 + (iStride1 << 3), iStride1, pSample2 + (iStride2 << 3), iStride2);
iSadSum += WelsSampleSad8x8_c (pSample1 + (iStride1 << 3) + 8, iStride1, pSample2 + (iStride2 << 3) + 8, iStride2);
return iSadSum;
}
void WelsSampleSadFour16x16_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2,
int32_t* pSad) {
* (pSad) = WelsSampleSad16x16_c (iSample1, iStride1, (iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad16x16_c (iSample1, iStride1, (iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad16x16_c (iSample1, iStride1, (iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad16x16_c (iSample1, iStride1, (iSample2 + 1), iStride2);
}
void WelsSampleSadFour16x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) {
* (pSad) = WelsSampleSad16x8_c (iSample1, iStride1, (iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad16x8_c (iSample1, iStride1, (iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad16x8_c (iSample1, iStride1, (iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad16x8_c (iSample1, iStride1, (iSample2 + 1), iStride2);
}
void WelsSampleSadFour8x16_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) {
* (pSad) = WelsSampleSad8x16_c (iSample1, iStride1, (iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad8x16_c (iSample1, iStride1, (iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad8x16_c (iSample1, iStride1, (iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad8x16_c (iSample1, iStride1, (iSample2 + 1), iStride2);
}
void WelsSampleSadFour8x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) {
* (pSad) = WelsSampleSad8x8_c (iSample1, iStride1, (iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad8x8_c (iSample1, iStride1, (iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad8x8_c (iSample1, iStride1, (iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad8x8_c (iSample1, iStride1, (iSample2 + 1), iStride2);
}
void WelsSampleSadFour4x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) {
* (pSad) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad4x4_c (iSample1, iStride1, (iSample2 + 1), iStride2);
}
void WelsSampleSadFour8x4_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) {
* (pSad) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad8x4_c (iSample1, iStride1, (iSample2 + 1), iStride2);
}
void WelsSampleSadFour4x8_c (uint8_t* iSample1, int32_t iStride1, uint8_t* iSample2, int32_t iStride2, int32_t* pSad) {
* (pSad) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad4x8_c (iSample1, iStride1, (iSample2 + 1), iStride2);
}

View File

@ -0,0 +1,126 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file utils.c
*
* \brief common tool/function utilization
*
* \date 03/10/2009 Created
*
*************************************************************************************
*/
#include "utils.h"
#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms
#include "codec_app_def.h"
float WelsCalcPsnr (const void* kpTarPic,
const int32_t kiTarStride,
const void* kpRefPic,
const int32_t kiRefStride,
const int32_t kiWidth,
const int32_t kiHeight);
void WelsLog (SLogContext* logCtx, int32_t iLevel, const char* kpFmt, ...) {
va_list vl;
char pTraceTag[MAX_LOG_SIZE] = {0};
switch (iLevel) {
case WELS_LOG_ERROR:
WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Error:", logCtx->pCodecInstance);
break;
case WELS_LOG_WARNING:
WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Warning:", logCtx->pCodecInstance);
break;
case WELS_LOG_INFO:
WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Info:", logCtx->pCodecInstance);
break;
case WELS_LOG_DEBUG:
WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Debug:", logCtx->pCodecInstance);
break;
default:
WelsSnprintf (pTraceTag, MAX_LOG_SIZE, "[OpenH264] this = 0x%p, Detail:", logCtx->pCodecInstance);
break;
}
WelsStrcat (pTraceTag, MAX_LOG_SIZE, kpFmt);
va_start (vl, kpFmt);
logCtx->pfLog (logCtx->pLogCtx, iLevel, pTraceTag, vl);
va_end (vl);
}
#ifndef CALC_PSNR
#define CONST_FACTOR_PSNR (10.0 / log(10.0)) // for good computation
#define CALC_PSNR(w, h, s) ((float)(CONST_FACTOR_PSNR * log( 65025.0 * w * h / s )))
#endif//CALC_PSNR
/*
* PSNR calculation routines
*/
/*!
*************************************************************************************
* \brief PSNR calculation utilization in Wels
*
* \param pTarPic target picture to be calculated in Picture pData format
* \param iTarStride stride of target picture pData pBuffer
* \param pRefPic base referencing picture samples
* \param iRefStride stride of reference picture pData pBuffer
* \param iWidth picture iWidth in pixel
* \param iHeight picture iHeight in pixel
*
* \return actual PSNR result;
*
* \note N/A
*************************************************************************************
*/
float WelsCalcPsnr (const void* kpTarPic,
const int32_t kiTarStride,
const void* kpRefPic,
const int32_t kiRefStride,
const int32_t kiWidth,
const int32_t kiHeight) {
int64_t iSqe = 0;
int32_t x, y;
uint8_t* pTar = (uint8_t*)kpTarPic;
uint8_t* pRef = (uint8_t*)kpRefPic;
if (NULL == pTar || NULL == pRef)
return (-1.0f);
for (y = 0; y < kiHeight; ++ y) { // OPTable !!
for (x = 0; x < kiWidth; ++ x) {
const int32_t kiT = pTar[y * kiTarStride + x] - pRef[y * kiRefStride + x];
iSqe += kiT * kiT;
}
}
if (0 == iSqe) {
return (99.99f);
}
return CALC_PSNR (kiWidth, kiHeight, iSqe);
}

View File

@ -0,0 +1,103 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef _WIN32
#include <windows.h>
#include <tchar.h>
#endif
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms
#include "welsCodecTrace.h"
#include "utils.h"
static void welsStderrTrace (void* ctx, int level, const char* string) {
fprintf (stderr, "%s\n", string);
}
welsCodecTrace::welsCodecTrace() {
m_iTraceLevel = WELS_LOG_DEFAULT;
m_fpTrace = welsStderrTrace;
m_pTraceCtx = NULL;
m_sLogCtx.pLogCtx = this;
m_sLogCtx.pfLog = StaticCodecTrace;
m_sLogCtx.pCodecInstance = NULL;
}
welsCodecTrace::~welsCodecTrace() {
m_fpTrace = NULL;
}
void welsCodecTrace::StaticCodecTrace (void* pCtx, const int32_t iLevel, const char* Str_Format, va_list vl) {
welsCodecTrace* self = (welsCodecTrace*) pCtx;
self->CodecTrace (iLevel, Str_Format, vl);
}
void welsCodecTrace::CodecTrace (const int32_t iLevel, const char* Str_Format, va_list vl) {
if (m_iTraceLevel < iLevel) {
return;
}
char pBuf[MAX_LOG_SIZE] = {0};
WelsVsnprintf (pBuf, MAX_LOG_SIZE, Str_Format, vl); // confirmed_safe_unsafe_usage
if (m_fpTrace) {
m_fpTrace (m_pTraceCtx, iLevel, pBuf);
}
}
void welsCodecTrace::SetCodecInstance (void* pCodecInstance) {
m_sLogCtx.pCodecInstance = pCodecInstance;
}
void welsCodecTrace::SetTraceLevel (const int32_t iLevel) {
if (iLevel >= 0)
m_iTraceLevel = iLevel;
}
void welsCodecTrace::SetTraceCallback (WelsTraceCallback func) {
m_fpTrace = func;
}
void welsCodecTrace::SetTraceCallbackContext (void* ctx) {
m_pTraceCtx = ctx;
}

View File

@ -0,0 +1,743 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* sse2inc.asm
;*
;* Abstract
;* macro and constant
;*
;* History
;* 8/5/2009 Created
;*
;*
;*************************************************************************/
;***********************************************************************
; Options, for DEBUG
;***********************************************************************
%if 1
%define MOVDQ movdqa
%else
%define MOVDQ movdqu
%endif
%if 1
%define WELSEMMS emms
%else
%define WELSEMMS
%endif
;***********************************************************************
; Macros
;***********************************************************************
%ifdef WIN64 ; Windows x64 ;************************************
DEFAULT REL
BITS 64
%define arg1 rcx
%define arg2 rdx
%define arg3 r8
%define arg4 r9
%define arg5 [rsp + push_num*8 + 40]
%define arg6 [rsp + push_num*8 + 48]
%define arg7 [rsp + push_num*8 + 56]
%define arg8 [rsp + push_num*8 + 64]
%define arg9 [rsp + push_num*8 + 72]
%define arg10 [rsp + push_num*8 + 80]
%define arg11 [rsp + push_num*8 + 88]
%define arg12 [rsp + push_num*8 + 96]
%define arg1d ecx
%define arg2d edx
%define arg3d r8d
%define arg4d r9d
%define arg5d arg5
%define arg6d arg6
%define arg7d arg7
%define arg8d arg8
%define arg9d arg9
%define arg10d arg10
%define arg11d arg11
%define arg12d arg12
%define r0 rcx
%define r1 rdx
%define r2 r8
%define r3 r9
%define r4 rax
%define r5 r10
%define r6 r11
%define r7 rsp
%define r0d ecx
%define r1d edx
%define r2d r8d
%define r3d r9d
%define r4d eax
%define r5d r10d
%define r6d r11d
%define r0w cx
%define r1w dx
%define r2w r8w
%define r3w r9w
%define r4w ax
%define r6w r11w
%define r0b cl
%define r1b dl
%define r2b r8l
%define r3b r9l
%define PUSHRFLAGS pushfq
%define POPRFLAGS popfq
%define retrq rax
%define retrd eax
%elifdef UNIX64 ; Unix x64 ;************************************
DEFAULT REL
BITS 64
%ifidn __OUTPUT_FORMAT__,elf64
SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-executable
%endif
%define arg1 rdi
%define arg2 rsi
%define arg3 rdx
%define arg4 rcx
%define arg5 r8
%define arg6 r9
%define arg7 [rsp + push_num*8 + 8]
%define arg8 [rsp + push_num*8 + 16]
%define arg9 [rsp + push_num*8 + 24]
%define arg10 [rsp + push_num*8 + 32]
%define arg11 [rsp + push_num*8 + 40]
%define arg12 [rsp + push_num*8 + 48]
%define arg1d edi
%define arg2d esi
%define arg3d edx
%define arg4d ecx
%define arg5d r8d
%define arg6d r9d
%define arg7d arg7
%define arg8d arg8
%define arg9d arg9
%define arg10d arg10
%define arg11d arg11
%define arg12d arg12
%define r0 rdi
%define r1 rsi
%define r2 rdx
%define r3 rcx
%define r4 r8
%define r5 r9
%define r6 r10
%define r7 rsp
%define r0d edi
%define r1d esi
%define r2d edx
%define r3d ecx
%define r4d r8d
%define r5d r9d
%define r6d r10d
%define r0w di
%define r1w si
%define r2w dx
%define r3w cx
%define r4w r8w
%define r6w r10w
%define r0b dil
%define r1b sil
%define r2b dl
%define r3b cl
%define PUSHRFLAGS pushfq
%define POPRFLAGS popfq
%define retrq rax
%define retrd eax
%elifdef X86_32 ; X86_32 ;************************************
BITS 32
%ifidn __OUTPUT_FORMAT__,elf
SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-executable
%endif
%define arg1 [esp + push_num*4 + 4]
%define arg2 [esp + push_num*4 + 8]
%define arg3 [esp + push_num*4 + 12]
%define arg4 [esp + push_num*4 + 16]
%define arg5 [esp + push_num*4 + 20]
%define arg6 [esp + push_num*4 + 24]
%define arg7 [esp + push_num*4 + 28]
%define arg8 [esp + push_num*4 + 32]
%define arg9 [esp + push_num*4 + 36]
%define arg10 [esp + push_num*4 + 40]
%define arg11 [esp + push_num*4 + 44]
%define arg12 [esp + push_num*4 + 48]
%define arg1d arg1
%define arg2d arg2
%define arg3d arg3
%define arg4d arg4
%define arg5d arg5
%define arg6d arg6
%define arg7d arg7
%define arg8d arg8
%define arg9d arg9
%define arg10d arg10
%define arg11d arg11
%define arg12d arg12
%define r0 eax
%define r1 ecx
%define r2 edx
%define r3 ebx
%define r4 esi
%define r5 edi
%define r6 ebp
%define r7 esp
%define r0d eax
%define r1d ecx
%define r2d edx
%define r3d ebx
%define r4d esi
%define r5d edi
%define r6d ebp
%define r0w ax
%define r1w cx
%define r2w dx
%define r3w bx
%define r4w si
%define r6w bp
%define r0b al
%define r1b cl
%define r2b dl
%define r3b bl
%define PUSHRFLAGS pushfd
%define POPRFLAGS popfd
%define retrq eax ; 32 bit mode do not support 64 bits regesters
%define retrd eax
%endif
%macro LOAD_PARA 2
mov %1, %2
%endmacro
%macro LOAD_1_PARA 0
%ifdef X86_32
mov r0, [esp + push_num*4 + 4]
%endif
%endmacro
%macro LOAD_2_PARA 0
%ifdef X86_32
mov r0, [esp + push_num*4 + 4]
mov r1, [esp + push_num*4 + 8]
%endif
%endmacro
%macro LOAD_3_PARA 0
%ifdef X86_32
mov r0, [esp + push_num*4 + 4]
mov r1, [esp + push_num*4 + 8]
mov r2, [esp + push_num*4 + 12]
%endif
%endmacro
%macro LOAD_4_PARA 0
%ifdef X86_32
push r3
%assign push_num push_num+1
mov r0, [esp + push_num*4 + 4]
mov r1, [esp + push_num*4 + 8]
mov r2, [esp + push_num*4 + 12]
mov r3, [esp + push_num*4 + 16]
%endif
%endmacro
%macro LOAD_5_PARA 0
%ifdef X86_32
push r3
push r4
%assign push_num push_num+2
mov r0, [esp + push_num*4 + 4]
mov r1, [esp + push_num*4 + 8]
mov r2, [esp + push_num*4 + 12]
mov r3, [esp + push_num*4 + 16]
mov r4, [esp + push_num*4 + 20]
%elifdef WIN64
mov r4, [rsp + push_num*8 + 40]
%endif
%endmacro
%macro LOAD_6_PARA 0
%ifdef X86_32
push r3
push r4
push r5
%assign push_num push_num+3
mov r0, [esp + push_num*4 + 4]
mov r1, [esp + push_num*4 + 8]
mov r2, [esp + push_num*4 + 12]
mov r3, [esp + push_num*4 + 16]
mov r4, [esp + push_num*4 + 20]
mov r5, [esp + push_num*4 + 24]
%elifdef WIN64
mov r4, [rsp + push_num*8 + 40]
mov r5, [rsp + push_num*8 + 48]
%endif
%endmacro
%macro LOAD_7_PARA 0
%ifdef X86_32
push r3
push r4
push r5
push r6
%assign push_num push_num+4
mov r0, [esp + push_num*4 + 4]
mov r1, [esp + push_num*4 + 8]
mov r2, [esp + push_num*4 + 12]
mov r3, [esp + push_num*4 + 16]
mov r4, [esp + push_num*4 + 20]
mov r5, [esp + push_num*4 + 24]
mov r6, [esp + push_num*4 + 28]
%elifdef WIN64
mov r4, [rsp + push_num*8 + 40]
mov r5, [rsp + push_num*8 + 48]
mov r6, [rsp + push_num*8 + 56]
%elifdef UNIX64
mov r6, [rsp + push_num*8 + 8]
%endif
%endmacro
%macro LOAD_4_PARA_POP 0
%ifdef X86_32
pop r3
%endif
%endmacro
%macro LOAD_5_PARA_POP 0
%ifdef X86_32
pop r4
pop r3
%endif
%endmacro
%macro LOAD_6_PARA_POP 0
%ifdef X86_32
pop r5
pop r4
pop r3
%endif
%endmacro
%macro LOAD_7_PARA_POP 0
%ifdef X86_32
pop r6
pop r5
pop r4
pop r3
%endif
%endmacro
%macro PUSH_XMM 1
%ifdef WIN64
%assign xmm_num_regs %1
%if xmm_num_regs > 6
%ifdef push_num
%assign push_num push_num+2*(%1-6)
%endif
sub rsp, 16*(%1 - 6)
movdqu [rsp], xmm6
%endif
%if xmm_num_regs > 7
movdqu [rsp+16], xmm7
%endif
%if xmm_num_regs > 8
movdqu [rsp+32], xmm8
%endif
%if xmm_num_regs > 9
movdqu [rsp+48], xmm9
%endif
%if xmm_num_regs > 10
movdqu [rsp+64], xmm10
%endif
%if xmm_num_regs > 11
movdqu [rsp+80], xmm11
%endif
%if xmm_num_regs > 12
movdqu [rsp+96], xmm12
%endif
%if xmm_num_regs > 13
movdqu [rsp+112], xmm13
%endif
%if xmm_num_regs > 14
movdqu [rsp+128], xmm14
%endif
%if xmm_num_regs > 15
movdqu [rsp+144], xmm15
%endif
%endif
%endmacro
%macro POP_XMM 0
%ifdef WIN64
%if xmm_num_regs > 15
movdqu xmm15, [rsp+144]
%endif
%if xmm_num_regs > 14
movdqu xmm14, [rsp+128]
%endif
%if xmm_num_regs > 13
movdqu xmm13, [rsp+112]
%endif
%if xmm_num_regs > 12
movdqu xmm12, [rsp+96]
%endif
%if xmm_num_regs > 11
movdqu xmm11, [rsp+80]
%endif
%if xmm_num_regs > 10
movdqu xmm10, [rsp+64]
%endif
%if xmm_num_regs > 9
movdqu xmm9, [rsp+48]
%endif
%if xmm_num_regs > 8
movdqu xmm8, [rsp+32]
%endif
%if xmm_num_regs > 7
movdqu xmm7, [rsp+16]
%endif
%if xmm_num_regs > 6
movdqu xmm6, [rsp]
add rsp, 16*(xmm_num_regs - 6)
%endif
%endif
%endmacro
%macro SIGN_EXTENSION 2
%ifndef X86_32
movsxd %1, %2
%endif
%endmacro
%macro SIGN_EXTENSIONW 2
%ifndef X86_32
movsx %1, %2
%endif
%endmacro
%macro ZERO_EXTENSION 1
%ifndef X86_32
mov dword %1, %1
%endif
%endmacro
%macro WELS_EXTERN 1
ALIGN 16, nop
%ifdef PREFIX
%ifdef WELS_PRIVATE_EXTERN
global _%1: WELS_PRIVATE_EXTERN
%else
global _%1
%endif
%define %1 _%1
%else
%ifdef WELS_PRIVATE_EXTERN
global %1: WELS_PRIVATE_EXTERN
%else
global %1
%endif
%endif
%1:
%endmacro
%macro WELS_AbsW 2
pxor %2, %2
psubw %2, %1
pmaxsw %1, %2
%endmacro
%macro MMX_XSwap 4
movq %4, %2
punpckh%1 %4, %3
punpckl%1 %2, %3
%endmacro
; pOut mm1, mm4, mm5, mm3
%macro MMX_Trans4x4W 5
MMX_XSwap wd, %1, %2, %5
MMX_XSwap wd, %3, %4, %2
MMX_XSwap dq, %1, %3, %4
MMX_XSwap dq, %5, %2, %3
%endmacro
;for TRANSPOSE
%macro SSE2_XSawp 4
movdqa %4, %2
punpckl%1 %2, %3
punpckh%1 %4, %3
%endmacro
; in: xmm1, xmm2, xmm3, xmm4 pOut: xmm1, xmm4, xmm5, mm3
%macro SSE2_Trans4x4D 5
SSE2_XSawp dq, %1, %2, %5
SSE2_XSawp dq, %3, %4, %2
SSE2_XSawp qdq, %1, %3, %4
SSE2_XSawp qdq, %5, %2, %3
%endmacro
;in: xmm0, xmm1, xmm2, xmm3 pOut: xmm0, xmm1, xmm3, xmm4
%macro SSE2_TransTwo4x4W 5
SSE2_XSawp wd, %1, %2, %5
SSE2_XSawp wd, %3, %4, %2
SSE2_XSawp dq, %1, %3, %4
SSE2_XSawp dq, %5, %2, %3
SSE2_XSawp qdq, %1, %5, %2
SSE2_XSawp qdq, %4, %3, %5
%endmacro
;in: m1, m2, m3, m4, m5, m6, m7, m8
;pOut: m5, m3, m4, m8, m6, m2, m7, m1
%macro SSE2_TransTwo8x8B 9
movdqa %9, %8
SSE2_XSawp bw, %1, %2, %8
SSE2_XSawp bw, %3, %4, %2
SSE2_XSawp bw, %5, %6, %4
movdqa %6, %9
movdqa %9, %4
SSE2_XSawp bw, %7, %6, %4
SSE2_XSawp wd, %1, %3, %6
SSE2_XSawp wd, %8, %2, %3
SSE2_XSawp wd, %5, %7, %2
movdqa %7, %9
movdqa %9, %3
SSE2_XSawp wd, %7, %4, %3
SSE2_XSawp dq, %1, %5, %4
SSE2_XSawp dq, %6, %2, %5
SSE2_XSawp dq, %8, %7, %2
movdqa %7, %9
movdqa %9, %5
SSE2_XSawp dq, %7, %3, %5
SSE2_XSawp qdq, %1, %8, %3
SSE2_XSawp qdq, %4, %2, %8
SSE2_XSawp qdq, %6, %7, %2
movdqa %7, %9
movdqa %9, %1
SSE2_XSawp qdq, %7, %5, %1
movdqa %5, %9
%endmacro
;xmm0, xmm6, xmm7, [eax], [ecx]
;xmm7 = 0, eax = pix1, ecx = pix2, xmm0 save the result
%macro SSE2_LoadDiff8P 5
movq %1, %4
punpcklbw %1, %3
movq %2, %5
punpcklbw %2, %3
psubw %1, %2
%endmacro
; m2 = m1 + m2, m1 = m1 - m2
%macro SSE2_SumSub 3
movdqa %3, %2
paddw %2, %1
psubw %1, %3
%endmacro
%macro butterfly_1to16_sse 3 ; xmm? for dst, xmm? for tmp, one byte for pSrc [generic register name: a/b/c/d]
mov %3h, %3l
movd %1, e%3x ; i.e, 1% = eax (=b0)
pshuflw %2, %1, 00h ; ..., b0 b0 b0 b0 b0 b0 b0 b0
pshufd %1, %2, 00h ; b0 b0 b0 b0, b0 b0 b0 b0, b0 b0 b0 b0, b0 b0 b0 b0
%endmacro
;copy a dw into a xmm for 8 times
%macro SSE2_Copy8Times 2
movd %1, %2
punpcklwd %1, %1
pshufd %1, %1, 0
%endmacro
;copy a db into a xmm for 16 times
%macro SSE2_Copy16Times 2
movd %1, %2
pshuflw %1, %1, 0
punpcklqdq %1, %1
packuswb %1, %1
%endmacro
;***********************************************************************
;preprocessor constants
;***********************************************************************
;dw 32,32,32,32,32,32,32,32 for xmm
;dw 32,32,32,32 for mm
%macro WELS_DW32 1
pcmpeqw %1,%1
psrlw %1,15
psllw %1,5
%endmacro
;dw 1, 1, 1, 1, 1, 1, 1, 1 for xmm
;dw 1, 1, 1, 1 for mm
%macro WELS_DW1 1
pcmpeqw %1,%1
psrlw %1,15
%endmacro
;all 0 for xmm and mm
%macro WELS_Zero 1
pxor %1, %1
%endmacro
;dd 1, 1, 1, 1 for xmm
;dd 1, 1 for mm
%macro WELS_DD1 1
pcmpeqw %1,%1
psrld %1,31
%endmacro
;dB 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
%macro WELS_DB1 1
pcmpeqw %1,%1
psrlw %1,15
packuswb %1,%1
%endmacro
%macro WELS_DW1_VEX 1
vpcmpeqw %1, %1, %1
vpsrlw %1, %1, 15
%endmacro
%macro WELS_DW32_VEX 1
vpcmpeqw %1, %1, %1
vpsrlw %1, %1, 15
vpsllw %1, %1, 5
%endmacro
%macro WELS_DW32767_VEX 1
vpcmpeqw %1, %1, %1
vpsrlw %1, %1, 1
%endmacro
;***********************************************************************
; Utility macros for X86_32 PIC support
;***********************************************************************
; Used internally by other macros.
%macro INIT_X86_32_PIC_ 2
%ifdef X86_32_PICASM
%xdefine pic_ptr %1
%xdefine pic_ptr_preserve %2
%if pic_ptr_preserve
%assign push_num push_num+1
push pic_ptr
%endif
call %%get_pc
%%pic_refpoint:
jmp %%pic_init_done
%%get_pc:
mov pic_ptr, [esp]
ret
%%pic_init_done:
%define pic(data_addr) (pic_ptr+(data_addr)-%%pic_refpoint)
%else
%define pic(data_addr) (data_addr)
%endif
%endmacro
; Get program counter and define a helper macro "pic(addr)" to convert absolute
; addresses to program counter-relative addresses if X86_32_PICASM is defined.
; Otherwise define "pic(addr)" as an identity function.
; %1=register to store PC/EIP in.
%macro INIT_X86_32_PIC 1
INIT_X86_32_PIC_ %1, 1
%endmacro
; Equivalent as above, but without preserving the value of the register argument.
%macro INIT_X86_32_PIC_NOPRESERVE 1
INIT_X86_32_PIC_ %1, 0
%endmacro
; Clean up after INIT_X86_32_PIC.
; Restore the register used to hold PC/EIP if applicable, and undefine defines.
%macro DEINIT_X86_32_PIC 0
%ifdef X86_32_PICASM
%if pic_ptr_preserve
pop pic_ptr
%assign push_num push_num-1
%endif
%undef pic_ptr
%undef pic_ptr_preserve
%endif
%undef pic
%endmacro
; Equivalent as above, but without undefining. Useful for functions with
; multiple epilogues.
%macro DEINIT_X86_32_PIC_KEEPDEF 0
%ifdef X86_32_PICASM
%if pic_ptr_preserve
pop pic_ptr
%assign push_num push_num-1
%endif
%endif
%endmacro

View File

@ -0,0 +1,212 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* cpu_mmx.asm
;*
;* Abstract
;* verify cpuid feature support and cpuid detection
;*
;* History
;* 04/29/2009 Created
;*
;*************************************************************************/
%include "asm_inc.asm"
;******************************************************************************************
; Macros
;******************************************************************************************
;******************************************************************************************
; Code
;******************************************************************************************
SECTION .text
; refer to "The IA-32 Intel(R) Architecture Software Developers Manual, Volume 2A A-M"
; section CPUID - CPU Identification
;******************************************************************************************
; int32_t WelsCPUIdVerify()
;******************************************************************************************
WELS_EXTERN WelsCPUIdVerify
push r1
PUSHRFLAGS
PUSHRFLAGS
pop r1
mov eax, r1d
xor eax, 00200000h
xor eax, r1d
POPRFLAGS
pop r1
ret
;****************************************************************************************************
; void WelsCPUId( int32_t uiIndex, int32_t *pFeatureA, int32_t *pFeatureB, int32_t *pFeatureC, int32_t *pFeatureD )
;****************************************************************************************************
%ifdef WIN64
WELS_EXTERN WelsCPUId
push rbx
push rdx
mov eax, ecx
mov ecx, [r9]
cpuid
mov [r9], ecx
mov [r8], ebx
mov rcx, [rsp + 2*8 + 40]
mov [rcx], edx
pop rdx
mov [rdx], eax
pop rbx
ret
%elifdef UNIX64
WELS_EXTERN WelsCPUId
push rbx
push rcx
push rdx
mov eax, edi
mov ecx, [rcx]
cpuid
mov [r8], edx
pop rdx
pop r8
mov [r8], ecx
mov [rdx], ebx
mov [rsi], eax
pop rbx
ret
%elifdef X86_32
WELS_EXTERN WelsCPUId
push ebx
push edi
mov eax, [esp+12] ; operating index
mov edi, [esp+24]
mov ecx, [edi]
cpuid ; cpuid
; processing various information return
mov edi, [esp+16]
mov [edi], eax
mov edi, [esp+20]
mov [edi], ebx
mov edi, [esp+24]
mov [edi], ecx
mov edi, [esp+28]
mov [edi], edx
pop edi
pop ebx
ret
%endif
; need call after cpuid=1 and eax, ecx flag got then
;****************************************************************************************************
; int32_t WelsCPUSupportAVX( uint32_t eax, uint32_t ecx )
;****************************************************************************************************
WELS_EXTERN WelsCPUSupportAVX
%ifdef WIN64
mov eax, ecx
mov ecx, edx
%elifdef UNIX64
mov eax, edi
mov ecx, esi
%else
mov eax, [esp+4]
mov ecx, [esp+8]
%endif
; refer to detection of AVX addressed in INTEL AVX manual document
and ecx, 018000000H
cmp ecx, 018000000H ; check both OSXSAVE and AVX feature flags
jne avx_not_supported
; processor supports AVX instructions and XGETBV is enabled by OS
mov ecx, 0 ; specify 0 for XFEATURE_ENABLED_MASK register
XGETBV ; result in EDX:EAX
and eax, 06H
cmp eax, 06H ; check OS has enabled both XMM and YMM state support
jne avx_not_supported
mov eax, 1
ret
avx_not_supported:
mov eax, 0
ret
; need call after cpuid=1 and eax, ecx flag got then
;****************************************************************************************************
; int32_t WelsCPUSupportFMA( uint32_t eax, uint32_t ecx )
;****************************************************************************************************
WELS_EXTERN WelsCPUSupportFMA
%ifdef WIN64
mov eax, ecx
mov ecx, edx
%elifdef UNIX64
mov eax, edi
mov ecx, esi
%else
mov eax, [esp+4]
mov ecx, [esp+8]
%endif
; refer to detection of FMA addressed in INTEL AVX manual document
and ecx, 018001000H
cmp ecx, 018001000H ; check OSXSAVE, AVX, FMA feature flags
jne fma_not_supported
; processor supports AVX,FMA instructions and XGETBV is enabled by OS
mov ecx, 0 ; specify 0 for XFEATURE_ENABLED_MASK register
XGETBV ; result in EDX:EAX
and eax, 06H
cmp eax, 06H ; check OS has enabled both XMM and YMM state support
jne fma_not_supported
mov eax, 1
ret
fma_not_supported:
mov eax, 0
ret
;******************************************************************************************
; void WelsEmms()
;******************************************************************************************
WELS_EXTERN WelsEmms
emms ; empty mmx technology states
ret

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,848 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* deblock.asm
;*
;* Abstract
;* edge loop
;*
;* History
;* 08/07/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
;*******************************************************************************
; Macros and other preprocessor constants
;*******************************************************************************
%ifdef X86_32_PICASM
SECTION .text align=16
%else
SECTION .rodata align=16
%endif
ALIGN 16
FOUR_16B_SSE2: dw 4, 4, 4, 4, 4, 4, 4, 4
ALIGN 16
WELS_DB1_16:
times 16 db 1
WELS_DB127_16:
times 16 db 127
WELS_DB96_16:
times 16 db 96
WELS_SHUFB0000111122223333:
times 4 db 0
times 4 db 1
times 4 db 2
times 4 db 3
SECTION .text
; Unsigned byte absolute difference.
; a=%1 b=%2 clobber=%3
; Subtract once in each direction with saturation and return the maximum.
%macro SSE2_AbsDiffUB 3
movdqa %3, %2
psubusb %3, %1
psubusb %1, %2
por %1, %3
%endmacro
; Unsigned byte compare less than.
; lhs=%1 rhs^0x7f=%2 0x7f=%3
; No unsigned byte lt/gt compare instruction available; xor by 0x7f and use a
; signed compare. Some other options do exist. This one allows modifying the lhs
; without mov and uses a bitwise op which can be executed on most ports on
; common architectures.
%macro SSE2_CmpltUB 3
pxor %1, %3
pcmpgtb %1, %2
%endmacro
; Unsigned byte compare greater than or equal.
%macro SSE2_CmpgeUB 2
pminub %1, %2
pcmpeqb %1, %2
%endmacro
; Clip unsigned bytes to ref +/- diff.
; data=%1 ref=%2 maxdiff_from_ref=%3 clobber=%4
%macro SSE2_ClipUB 4
movdqa %4, %2
psubusb %4, %3
paddusb %3, %2
pmaxub %1, %4
pminub %1, %3
%endmacro
; (a + b + 1 - c) >> 1
; a=%1 b=%2 c=%3 [out:a^b&c]=%4
%macro SSE2_AvgbFloor1 4
movdqa %4, %1
pxor %4, %2
pavgb %1, %2
pand %4, %3
psubb %1, %4
%endmacro
; (a + b + carry) >> 1
; a=%1 b=%2 carry-1=%3
%macro SSE2_AvgbFloor2 3
pxor %1, %3
pxor %2, %3
pavgb %1, %2
pxor %1, %3
%endmacro
; a = (a & m) | (b & ~m)
; a=%1 b=%2 m=%3
%macro SSE2_Blend 3
pand %1, %3
pandn %3, %2
por %1, %3
%endmacro
; Compute
; p0 = clip(p0 + clip((q0 - p0 + ((p1 - q1) >> 2) + 1) >> 1, -iTc, iTc), 0, 255)
; q0 = clip(q0 - clip((q0 - p0 + ((p1 - q1) >> 2) + 1) >> 1, -iTc, iTc), 0, 255)
; 16-wide parallel in packed byte representation in xmm registers.
;
; p1=%1 p0=%2 q0=%3 q1=%4 iTc=%5 FFh=%6 xmmclobber=%7,%8
%macro SSE2_DeblockP0Q0_Lt4 8
; (q0 - p0 + ((p1 - q1) >> 2) + 1) >> 1 clipped to [-96, 159] and biased to [0, 255].
; A limited range is sufficient because the value is clipped to [-iTc, iTc] later.
; Bias so that unsigned saturation can be used.
; Get ((p1 - q1) >> 2) + 192 via a pxor and two pavgbs.
; q0 - p0 is split into a non-negative and non-positive part. The latter is
; subtracted from the biased value.
movdqa %7, %2
psubusb %7, %3 ; clip(p0 - q0, 0, 255)
; ((p1 - q1) >> 2) + 0xc0
pxor %4, %6 ; q1 ^ 0xff aka -q1 - 1 & 0xff
pavgb %1, %4 ; (((p1 - q1 + 0x100) >> 1)
pavgb %1, %6 ; + 0x100) >> 1
psubusb %1, %7 ; -= clip(p0 - q0, 0, 255) saturate.
movdqa %8, %3
psubusb %8, %2 ; (clip(q0 - p0, 0, 255)
pavgb %8, %1 ; + clip(((p1 - q1 + 0x300) >> 2) - clip(p0 - q0, 0, 255), 0, 255) + 1) >> 1
; Unbias and split into a non-negative and a non-positive part.
; Clip each part to iTc via minub.
; Add/subtract each part to/from p0/q0 and clip.
movdqa %6, [pic(WELS_DB96_16)]
psubusb %6, %8
psubusb %8, [pic(WELS_DB96_16)]
pminub %6, %5
pminub %8, %5
psubusb %2, %6
paddusb %2, %8 ; p0
paddusb %3, %6
psubusb %3, %8 ; q0
%endmacro
;*******************************************************************************
; void DeblockLumaLt4V_ssse3(uint8_t * pPix, int32_t iStride, int32_t iAlpha,
; int32_t iBeta, int8_t * pTC)
;*******************************************************************************
WELS_EXTERN DeblockLumaLt4V_ssse3
%assign push_num 0
INIT_X86_32_PIC r5
LOAD_5_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
movd xmm1, arg3d
movd xmm2, arg4d
pxor xmm3, xmm3
pxor xmm1, [pic(WELS_DB127_16)]
pxor xmm2, [pic(WELS_DB127_16)]
pshufb xmm1, xmm3 ; iAlpha ^ 0x7f
pshufb xmm2, xmm3 ; iBeta ^ 0x7f
mov r2, r1 ; iStride
neg r1 ; -iStride
lea r3, [r0 + r1] ; pPix - iStride
; Compute masks to enable/disable deblocking.
MOVDQ xmm6, [r3 + 0 * r1] ; p0
MOVDQ xmm7, [r3 + 1 * r1] ; p1
MOVDQ xmm0, [r0 + 0 * r2] ; q0
movdqa xmm4, xmm6
SSE2_AbsDiffUB xmm6, xmm0, xmm3 ; |p0 - q0|
SSE2_CmpltUB xmm6, xmm1, [pic(WELS_DB127_16)] ; bDeltaP0Q0 = |p0 - q0| < iAlpha
MOVDQ xmm1, [r0 + 1 * r2] ; q1
SSE2_AbsDiffUB xmm7, xmm4, xmm3 ; |p1 - p0|
SSE2_AbsDiffUB xmm0, xmm1, xmm3 ; |q1 - q0|
pmaxub xmm7, xmm0 ; max(|p1 - p0|, |q1 - q0|)
SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP1P0 & bDeltaQ1Q0 = max(|p1 - p0|, |q1 - q0|) < iBeta
pand xmm6, xmm7 ; bDeltaP0Q0P1P0Q1Q0 = bDeltaP0Q0 & bDeltaP1P0 & bDeltaQ1Q0
MOVDQ xmm7, [r3 + 2 * r1] ; p2
movdqa xmm0, xmm7
SSE2_AbsDiffUB xmm7, xmm4, xmm3 ; |p2 - p0|
SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP2P0 = |p2 - p0| < iBeta
MOVDQ xmm5, [r0 + 2 * r2] ; q2
MOVDQ xmm3, [r0 + 0 * r2] ; q0
movdqa xmm1, xmm5
SSE2_AbsDiffUB xmm5, xmm3, xmm4 ; |q2 - q0|
SSE2_CmpltUB xmm5, xmm2, [pic(WELS_DB127_16)] ; bDeltaQ2Q0 = |q2 - q0| < iBeta
pavgb xmm3, [r3 + 0 * r1]
pcmpeqw xmm2, xmm2 ; FFh
pxor xmm3, xmm2
; (p2 + ((p0 + q0 + 1) >> 1)) >> 1
pxor xmm0, xmm2
pavgb xmm0, xmm3
pxor xmm0, xmm2
; (q2 + ((p0 + q0 + 1) >> 1)) >> 1
pxor xmm1, xmm2
pavgb xmm1, xmm3
pxor xmm1, xmm2
movd xmm3, [r4]
pshufb xmm3, [pic(WELS_SHUFB0000111122223333)] ; iTc
movdqa xmm4, xmm3 ; iTc0 = iTc
pcmpgtb xmm3, xmm2 ; iTc > -1 ? 0xff : 0x00
pand xmm6, xmm3 ; bDeltaP0Q0P1P0Q1Q0 &= iTc > -1
movdqa xmm3, xmm4
psubb xmm3, xmm7 ; iTc -= bDeltaP2P0 ? -1 : 0
psubb xmm3, xmm5 ; iTc -= bDeltaQ2Q0 ? -1 : 0
pand xmm3, xmm6 ; iTc &= bDeltaP0Q0P1P0Q1Q0 ? 0xff : 0
pand xmm7, xmm6 ; bDeltaP2P0 &= bDeltaP0Q0P1P0Q1Q0
pand xmm5, xmm6 ; bDeltaQ2Q0 &= bDeltaP0Q0P1P0Q1Q0
pand xmm7, xmm4 ; iTc0 & (bDeltaP2P0 ? 0xff : 0)
pand xmm5, xmm4 ; iTc0 & (bDeltaQ2Q0 ? 0xff : 0)
MOVDQ xmm4, [r3 + 1 * r1]
SSE2_ClipUB xmm0, xmm4, xmm7, xmm6 ; clip p1.
MOVDQ xmm6, [r0 + 1 * r2]
MOVDQ [r3 + 1 * r1], xmm0 ; store p1.
SSE2_ClipUB xmm1, xmm6, xmm5, xmm7 ; clip q1.
MOVDQ [r0 + 1 * r2], xmm1 ; store q1.
MOVDQ xmm1, [r3 + 0 * r1] ; p0
MOVDQ xmm0, [r0 + 0 * r2] ; q0
SSE2_DeblockP0Q0_Lt4 xmm4, xmm1, xmm0, xmm6, xmm3, xmm2, xmm5, xmm7
MOVDQ [r3 + 0 * r1], xmm1 ; store p0.
MOVDQ [r0 + 0 * r2], xmm0 ; store q0.
POP_XMM
LOAD_5_PARA_POP
DEINIT_X86_32_PIC
ret
; Deblock 3x16 luma pixels for the eq4 case.
;
; Compose 8-bit averages from pavgbs. Ie. (p1 + p0 + p2 + q0 + 2) >> 2 can be
; written as (((p1 + p0) >> 1) + ((p2 + q0 + (p1 ^ p0 & 1)) >> 1) + 1) >> 1,
; which maps to 3 pavgbs.
;
; pPix=%1 iStride=%2 [in:q0,out:p0]=%3 [in:q1,out:p1]=%4 bDeltaP0Q0P1P0Q1Q0=%5 bDeltaP2P0=%6 clobber=%7,%8,%9,%10 preserve_p0p1=%11 db1=%12
%macro SSE2_DeblockLumaEq4_3x16P 12
movdqa %7, %3
movdqa %8, %6
MOVDQ %10, [%1 + 1 * %2] ; p1
SSE2_Blend %7, %10, %8 ; t0 = bDeltaP2P0 ? q0 : p1
movdqa %8, %6
MOVDQ %9, [%1 + 2 * %2] ; p2
SSE2_Blend %9, %4, %8 ; t1 = bDeltaP2P0 ? p2 : q1
SSE2_AvgbFloor1 %4, %9, %12, %8 ; t1 = (t1 + q1) >> 1
SSE2_AvgbFloor1 %10, [%1], %12, %8 ; (p0 + p1) >> 1, p0 ^ p1
pxor %8, %12
SSE2_AvgbFloor1 %7, %4, %8, %9 ; (t0 + t1 + (p0 ^ p1 & 1)) >> 1
MOVDQ %9, [%1 + 2 * %2] ; p2
SSE2_AvgbFloor1 %3, %9, %8, %4 ; (p2 + q0 + (p0 ^ p1 & 1)) >> 1
pavgb %7, %10 ; p0' = (p0 + p1 + t0 + t1 + 2) >> 2
movdqa %8, %10
pxor %8, %3 ; (p0 + p1) >> 1 ^ (p2 + q0 + (p0 ^ p1 & 1)) >> 1
pand %8, %12 ; & 1
pavgb %10, %3 ; p1' = (p0 + p1 + p2 + q0 + 2) >> 2
pand %6, %5 ; bDeltaP2P0 &= bDeltaP0Q0P1P0Q1Q0
%if %11
MOVDQ %3, [%1 + 0 * %2] ; p0
movdqa %4, %5
SSE2_Blend %7, %3, %4 ; p0out = bDeltaP0Q0P1P0Q1Q0 ? p0' : p0
%else
SSE2_Blend %7, [%1 + 0 * %2], %5 ; p0out = bDeltaP0Q0P1P0Q1Q0 ? p0' : p0
%endif
MOVDQ [%1 + 0 * %2], %7 ; store p0
add %1, %2
movdqa %7, %10
psubb %10, %8 ; (p0 + p1 + p2 + q0) >> 2
psubb %8, %12
MOVDQ %4, [%1 + (3 - 1) * %2] ; p3
SSE2_AvgbFloor2 %4, %9, %8 ; (p2 + p3 + ((p0 + p1) >> 1 ^ (p2 + q0 + (p0 ^ p1 & 1)) >> 1 & 1)) >> 1
pavgb %10, %4 ; p2' = (((p0 + p1 + p2 + q0) >> 1) + p2 + p3 + 2) >> 2
movdqa %8, %6
SSE2_Blend %10, [%1 + (2 - 1) * %2], %8 ; p2out = bDeltaP2P0 ? p2' : p2
MOVDQ [%1 + (2 - 1) * %2], %10 ; store p2
%if %11
MOVDQ %4, [%1 + (1 - 1) * %2] ; p1
SSE2_Blend %7, %4, %6 ; p1out = bDeltaP2P0 ? p1' : p1
%else
SSE2_Blend %7, [%1 + (1 - 1) * %2], %6 ; p1out = bDeltaP2P0 ? p1' : p1
%endif
MOVDQ [%1 + (1 - 1) * %2], %7 ; store p1
%endmacro
;*******************************************************************************
; void DeblockLumaEq4V_ssse3(uint8_t * pPix, int32_t iStride, int32_t iAlpha,
; int32_t iBeta)
;*******************************************************************************
WELS_EXTERN DeblockLumaEq4V_ssse3
%assign push_num 0
INIT_X86_32_PIC r4
LOAD_4_PARA
PUSH_XMM 10
SIGN_EXTENSION r1, r1d
movd xmm1, arg3d
movd xmm2, arg4d
shr r2, 2
add r2, 1
movd xmm3, r2d
pxor xmm4, xmm4
pxor xmm1, [pic(WELS_DB127_16)]
pxor xmm2, [pic(WELS_DB127_16)]
pshufb xmm1, xmm4 ; iAlpha ^ 0x7f
pshufb xmm2, xmm4 ; iBeta ^ 0x7f
pshufb xmm3, xmm4 ; (iAlpha >> 2) + 1
mov r2, r1 ; iStride
neg r1 ; -iStride
lea r3, [r0 + r1] ; pPix - iStride
; Compute masks to enable/disable filtering.
MOVDQ xmm7, [r3 + 1 * r1] ; p1
MOVDQ xmm6, [r3 + 0 * r1] ; p0
MOVDQ xmm0, [r0 + 0 * r2] ; q0
movdqa xmm4, xmm6
SSE2_AbsDiffUB xmm6, xmm0, xmm5 ; |p0 - q0|
SSE2_CmpgeUB xmm3, xmm6 ; |p0 - q0| < (iAlpha >> 2) + 2
SSE2_CmpltUB xmm6, xmm1, [pic(WELS_DB127_16)] ; bDeltaP0Q0 = |p0 - q0| < iAlpha
MOVDQ xmm1, [r0 + 1 * r2] ; q1
SSE2_AbsDiffUB xmm7, xmm4, xmm5 ; |p1 - p0|
SSE2_AbsDiffUB xmm0, xmm1, xmm5 ; |q1 - q0|
pmaxub xmm7, xmm0 ; max(|p1 - p0|, |q1 - q0|)
SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP1P0 & bDeltaQ1Q0 = max(|p1 - p0|, |q1 - q0|) < iBeta
pand xmm6, xmm7 ; & bDeltaP0Q0
MOVDQ xmm7, [r3 + 2 * r1] ; p2
SSE2_AbsDiffUB xmm7, xmm4, xmm5 ; |p2 - p0|
SSE2_CmpltUB xmm7, xmm2, [pic(WELS_DB127_16)] ; bDeltaP2P0 = |p2 - p0| < iBeta
pand xmm7, xmm3 ; &= |p0 - q0| < (iAlpha >> 2) + 2
MOVDQ xmm0, [r0 + 0 * r2] ; q0
MOVDQ xmm5, [r0 + 2 * r2] ; q2
SSE2_AbsDiffUB xmm5, xmm0, xmm4 ; |q2 - q0|
SSE2_CmpltUB xmm5, xmm2, [pic(WELS_DB127_16)] ; bDeltaQ2Q0 = |q2 - q0| < iBeta
pand xmm5, xmm3 ; &= |p0 - q0| < (iAlpha >> 2) + 2
%ifdef X86_32
; Push xmm5 to free up one register. Align stack so as to ensure that failed
; store forwarding penalty cannot occur (up to ~50 cycles for 128-bit on IVB).
mov r2, esp
sub esp, 16
and esp, -16
movdqa [esp], xmm5
SSE2_DeblockLumaEq4_3x16P r3, r1, xmm0, xmm1, xmm6, xmm7, xmm2, xmm3, xmm5, xmm4, 1, [pic(WELS_DB1_16)]
movdqa xmm5, [esp]
mov esp, r2
neg r1
SSE2_DeblockLumaEq4_3x16P r0, r1, xmm0, xmm1, xmm6, xmm5, xmm2, xmm3, xmm7, xmm4, 0, [pic(WELS_DB1_16)]
%else
movdqa xmm9, [WELS_DB1_16]
SSE2_DeblockLumaEq4_3x16P r3, r1, xmm0, xmm1, xmm6, xmm7, xmm2, xmm3, xmm8, xmm4, 1, xmm9
SSE2_DeblockLumaEq4_3x16P r0, r2, xmm0, xmm1, xmm6, xmm5, xmm2, xmm3, xmm7, xmm4, 0, xmm9
%endif
POP_XMM
LOAD_4_PARA_POP
DEINIT_X86_32_PIC
ret
; [out:p1,p0,q0,q1]=%1,%2,%3,%4 pPixCb=%5 pPixCr=%6 iStride=%7 3*iStride-1=%8 xmmclobber=%9,%10,%11
%macro SSE2_LoadCbCr_4x16H 11
movd %1, [%5 + 0 * %7 - 2] ; [p1,p0,q0,q1] cb line 0
movd %2, [%5 + 2 * %7 - 2] ; [p1,p0,q0,q1] cb line 2
punpcklbw %1, %2 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 0,2
movd %2, [%5 + 4 * %7 - 2] ; [p1,p0,q0,q1] cb line 4
movd %9, [%5 + 2 * %8] ; [p1,p0,q0,q1] cb line 6
punpcklbw %2, %9 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 4,6
punpcklwd %1, %2 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cb line 0,2,4,6
movd %2, [%6 + 0 * %7 - 2] ; [p1,p0,q0,q1] cr line 0
movd %9, [%6 + 2 * %7 - 2] ; [p1,p0,q0,q1] cr line 2
punpcklbw %2, %9 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 0,2
movd %9, [%6 + 4 * %7 - 2] ; [p1,p0,q0,q1] cr line 4
movd %10, [%6 + 2 * %8] ; [p1,p0,q0,q1] cr line 6
punpcklbw %9, %10 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 4,6
punpcklwd %2, %9 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cr line 0,2,4,6
add %5, %7 ; pPixCb += iStride
add %6, %7 ; pPixCr += iStride
movd %9, [%5 + 0 * %7 - 2] ; [p1,p0,q0,q1] cb line 1
movd %10, [%5 + 2 * %7 - 2] ; [p1,p0,q0,q1] cb line 3
punpcklbw %9, %10 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 1,3
movd %10, [%5 + 4 * %7 - 2] ; [p1,p0,q0,q1] cb line 5
movd %3, [%5 + 2 * %8] ; [p1,p0,q0,q1] cb line 7
punpcklbw %10, %3 ; [p1,p1,p0,p0,q0,q0,q1,q1] cb line 5,7
punpcklwd %9, %10 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cb line 1,3,5,7
movd %10, [%6 + 0 * %7 - 2] ; [p1,p0,q0,q1] cr line 1
movd %3, [%6 + 2 * %7 - 2] ; [p1,p0,q0,q1] cr line 3
punpcklbw %10, %3 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 1,3
movd %3, [%6 + 4 * %7 - 2] ; [p1,p0,q0,q1] cr line 5
movd %4, [%6 + 2 * %8] ; [p1,p0,q0,q1] cr line 7
punpcklbw %3, %4 ; [p1,p1,p0,p0,q0,q0,q1,q1] cr line 5,7
punpcklwd %10, %3 ; [p1,p1,p1,p1,p0,p0,p0,p0,q0,q0,q0,q0,q1,q1,q1,q1] cr line 1,3,5,7
movdqa %3, %1
punpckldq %1, %2 ; [p1,p1,p1,p1,p1,p1,p1,p1,p0,p0,p0,p0,p0,p0,p0,p0] cb/cr line 0,2,4,6
punpckhdq %3, %2 ; [q0,q0,q0,q0,q0,q0,q0,q0,q1,q1,q1,q1,q1,q1,q1,q1] cb/cr line 0,2,4,6
movdqa %11, %9
punpckldq %9, %10 ; [p1,p1,p1,p1,p1,p1,p1,p1,p0,p0,p0,p0,p0,p0,p0,p0] cb/cr line 1,3,5,7
punpckhdq %11, %10 ; [q0,q0,q0,q0,q0,q0,q0,q0,q1,q1,q1,q1,q1,q1,q1,q1] cb/cr line 1,3,5,7
movdqa %2, %1
punpcklqdq %1, %9 ; [p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1,p1] cb/cr line 0,2,4,6,1,3,5,7
punpckhqdq %2, %9 ; [p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0,p0] cb/cr line 0,2,4,6,1,3,5,7
movdqa %4, %3
punpcklqdq %3, %11 ; [q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0,q0] cb/cr line 0,2,4,6,1,3,5,7
punpckhqdq %4, %11 ; [q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1,q1] cb/cr line 0,2,4,6,1,3,5,7
%endmacro
; pPixCb+iStride=%1 pPixCr+iStride=%2 iStride=%3 3*iStride-1=%4 p0=%5 q0=%6 rclobber=%7 dwclobber={%8,%9} xmmclobber=%10
%macro SSE2_StoreCbCr_4x16H 10
movdqa %10, %5
punpcklbw %10, %6 ; [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line 0,2,4,6
punpckhbw %5, %6 ; [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line 1,3,5,7
mov %7, r7 ; preserve stack pointer
and r7, -16 ; align stack pointer
sub r7, 32 ; allocate stack space
movdqa [r7 ], %10 ; store [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line 0,2,4,6 on the stack
movdqa [r7 + 16], %5 ; store [p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0,p0,q0] cb/cr line 1,3,5,7 on the stack
mov %8, [r7 + 16] ; [p0,q0,p0,q0] cb line 1,3
mov [%1 + 0 * %3 - 1], %9 ; store [p0,q0] cb line 1
shr %8, 16 ; [p0,q0] cb line 3
mov [%1 + 2 * %3 - 1], %9 ; store [p0,q0] cb line 3
mov %8, [r7 + 20] ; [p0,q0,p0,q0] cb line 5,7
mov [%1 + 4 * %3 - 1], %9 ; store [p0,q0] cb line 5
shr %8, 16 ; [p0,q0] cb line 7
mov [%1 + 2 * %4 + 1], %9 ; store [p0,q0] cb line 7
mov %8, [r7 + 24] ; [p0,q0,p0,q0] cr line 1,3
mov [%2 + 0 * %3 - 1], %9 ; store [p0,q0] cr line 1
shr %8, 16 ; [p0,q0] cr line 3
mov [%2 + 2 * %3 - 1], %9 ; store [p0,q0] cr line 3
mov %8, [r7 + 28] ; [p0,q0,p0,q0] cr line 5,7
mov [%2 + 4 * %3 - 1], %9 ; store [p0,q0] cr line 5
shr %8, 16 ; [p0,q0] cr line 7
mov [%2 + 2 * %4 + 1], %9 ; store [p0,q0] cr line 7
sub %1, %3 ; pPixCb -= iStride
sub %2, %3 ; pPixCr -= iStride
mov %8, [r7 ] ; [p0,q0,p0,q0] cb line 0,2
mov [%1 + 0 * %3 - 1], %9 ; store [p0,q0] cb line 0
shr %8, 16 ; [p0,q0] cb line 2
mov [%1 + 2 * %3 - 1], %9 ; store [p0,q0] cb line 2
mov %8, [r7 + 4] ; [p0,q0,p0,q0] cb line 4,6
mov [%1 + 4 * %3 - 1], %9 ; store [p0,q0] cb line 4
shr %8, 16 ; [p0,q0] cb line 6
mov [%1 + 2 * %4 + 1], %9 ; store [p0,q0] cb line 6
mov %8, [r7 + 8] ; [p0,q0,p0,q0] cr line 0,2
mov [%2 + 0 * %3 - 1], %9 ; store [p0,q0] cr line 0
shr %8, 16 ; [p0,q0] cr line 2
mov [%2 + 2 * %3 - 1], %9 ; store [p0,q0] cr line 2
mov %8, [r7 + 12] ; [p0,q0,p0,q0] cr line 4,6
mov [%2 + 4 * %3 - 1], %9 ; store [p0,q0] cr line 4
shr %8, 16 ; [p0,q0] cr line 6
mov [%2 + 2 * %4 + 1], %9 ; store [p0,q0] cr line 6
mov r7, %7 ; restore stack pointer
%endmacro
; p1=%1 p0=%2 q0=%3 q1=%4 iAlpha=%5 iBeta=%6 pTC=%7 xmmclobber=%8,%9,%10 interleaveTC=%11
%macro SSSE3_DeblockChromaLt4 11
movdqa %8, %3
SSE2_AbsDiffUB %8, %2, %9 ; |p0 - q0|
SSE2_CmpgeUB %8, %5 ; !bDeltaP0Q0 = |p0 - q0| >= iAlpha
movdqa %9, %4
SSE2_AbsDiffUB %9, %3, %5 ; |q1 - q0|
movdqa %10, %1
SSE2_AbsDiffUB %10, %2, %5 ; |p1 - p0|
pmaxub %9, %10 ; max(|q1 - q0|, |p1 - p0|)
pxor %10, %10
movd %5, %6
pshufb %5, %10 ; iBeta
SSE2_CmpgeUB %9, %5 ; !bDeltaQ1Q0 | !bDeltaP1P0 = max(|q1 - q0|, |p1 - p0|) >= iBeta
por %8, %9 ; | !bDeltaP0Q0
movd %5, [%7]
%if %11
punpckldq %5, %5
punpcklbw %5, %5 ; iTc
%else
pshufd %5, %5, 0 ; iTc
%endif
pcmpeqw %10, %10 ; FFh
movdqa %9, %5
pcmpgtb %9, %10 ; iTc > -1 ? FFh : 00h
pandn %8, %5 ; iTc & bDeltaP0Q0 & bDeltaP1P0 & bDeltaQ1Q0
pand %8, %9 ; &= (iTc > -1 ? FFh : 00h)
SSE2_DeblockP0Q0_Lt4 %1, %2, %3, %4, %8, %10, %5, %9
%endmacro
; p1=%1 p0=%2 q0=%3 q1=%4 iAlpha=%5 iBeta=%6 xmmclobber=%7,%8,%9
%macro SSSE3_DeblockChromaEq4 9
movdqa %7, %3
SSE2_AbsDiffUB %7, %2, %8 ; |p0 - q0|
SSE2_CmpgeUB %7, %5 ; !bDeltaP0Q0 = |p0 - q0| >= iAlpha
movdqa %8, %4
SSE2_AbsDiffUB %8, %3, %5 ; |q1 - q0|
movdqa %9, %1
SSE2_AbsDiffUB %9, %2, %5 ; |p1 - p0|
pmaxub %8, %9 ; max(|q1 - q0|, |p1 - p0|)
pxor %9, %9
movd %5, %6
pshufb %5, %9 ; iBeta
SSE2_CmpgeUB %8, %5 ; !bDeltaQ1Q0 | !bDeltaP1P0 = max(|q1 - q0|, |p1 - p0|) >= iBeta
por %7, %8 ; !bDeltaP0Q0P1P0Q1Q0 = !bDeltaP0Q0 | !bDeltaQ1Q0 | !bDeltaP1P0
WELS_DB1 %5
movdqa %8, %2
SSE2_AvgbFloor1 %8, %4, %5, %9 ; (p0 + q1) >> 1
pavgb %8, %1 ; p0' = (p1 + ((p0 + q1) >> 1) + 1) >> 1
movdqa %9, %7
SSE2_Blend %2, %8, %7 ; p0out = bDeltaP0Q0P1P0Q1Q0 ? p0' : p0
SSE2_AvgbFloor1 %1, %3, %5, %7 ; (q0 + p1) >> 1
pavgb %1, %4 ; q0' = (q1 + ((q0 + p1) >> 1) + 1) >> 1
SSE2_Blend %3, %1, %9 ; q0out = bDeltaP0Q0P1P0Q1Q0 ? q0' : q0
%endmacro
;******************************************************************************
; void DeblockChromaLt4V_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; int32_t iAlpha, int32_t iBeta, int8_t * pTC);
;*******************************************************************************
WELS_EXTERN DeblockChromaLt4V_ssse3
%assign push_num 0
INIT_X86_32_PIC r4
LOAD_4_PARA
PUSH_XMM 8
SIGN_EXTENSION r2, r2d
movd xmm7, arg4d
pxor xmm0, xmm0
pshufb xmm7, xmm0 ; iAlpha
mov r3, r2
neg r3 ; -iStride
movq xmm0, [r0 + 0 * r2] ; q0 cb
movhps xmm0, [r1 + 0 * r2] ; q0 cr
movq xmm2, [r0 + 1 * r3] ; p0 cb
movhps xmm2, [r1 + 1 * r3] ; p0 cr
movq xmm1, [r0 + 1 * r2] ; q1 cb
movhps xmm1, [r1 + 1 * r2] ; q1 cr
movq xmm3, [r0 + 2 * r3] ; p1 cb
movhps xmm3, [r1 + 2 * r3] ; p1 cr
%ifidni arg6, r5
SSSE3_DeblockChromaLt4 xmm3, xmm2, xmm0, xmm1, xmm7, arg5d, arg6, xmm4, xmm5, xmm6, 1
%else
mov r2, arg6
SSSE3_DeblockChromaLt4 xmm3, xmm2, xmm0, xmm1, xmm7, arg5d, r2, xmm4, xmm5, xmm6, 1
%endif
movlps [r0 + 1 * r3], xmm2 ; store p0 cb
movhps [r1 + 1 * r3], xmm2 ; store p0 cr
movlps [r0 ], xmm0 ; store q0 cb
movhps [r1 ], xmm0 ; store q0 cr
POP_XMM
LOAD_4_PARA_POP
DEINIT_X86_32_PIC
ret
;********************************************************************************
; void DeblockChromaEq4V_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; int32_t iAlpha, int32_t iBeta)
;********************************************************************************
WELS_EXTERN DeblockChromaEq4V_ssse3
%assign push_num 0
LOAD_4_PARA
PUSH_XMM 8
SIGN_EXTENSION r2, r2d
movd xmm7, arg4d
pxor xmm0, xmm0
pshufb xmm7, xmm0 ; iAlpha
mov r3, r2
neg r3 ; -iStride
movq xmm0, [r0 + 0 * r2] ; q0 cb
movhps xmm0, [r1 + 0 * r2] ; q0 cr
movq xmm2, [r0 + 1 * r3] ; p0 cb
movhps xmm2, [r1 + 1 * r3] ; p0 cr
movq xmm1, [r0 + 1 * r2] ; q1 cb
movhps xmm1, [r1 + 1 * r2] ; q1 cr
movq xmm3, [r0 + 2 * r3] ; p1 cb
movhps xmm3, [r1 + 2 * r3] ; p1 cr
SSSE3_DeblockChromaEq4 xmm3, xmm2, xmm0, xmm1, xmm7, arg5d, xmm4, xmm5, xmm6
movlps [r0 + 1 * r3], xmm2 ; store p0 cb
movhps [r1 + 1 * r3], xmm2 ; store p0 cr
movlps [r0 + 0 * r2], xmm0 ; store q0 cb
movhps [r1 + 0 * r2], xmm0 ; store q0 cr
POP_XMM
LOAD_4_PARA_POP
ret
;*******************************************************************************
; void DeblockChromaLt4H_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; int32_t iAlpha, int32_t iBeta, int8_t * pTC);
;*******************************************************************************
WELS_EXTERN DeblockChromaLt4H_ssse3
%assign push_num 0
LOAD_6_PARA
PUSH_XMM 8
SIGN_EXTENSION r2, r2d
movd xmm7, arg4d
pxor xmm0, xmm0
pshufb xmm7, xmm0 ; iAlpha
lea r3, [3 * r2 - 1] ; 3 * iStride - 1
SSE2_LoadCbCr_4x16H xmm0, xmm1, xmm4, xmm5, r0, r1, r2, r3, xmm2, xmm3, xmm6
INIT_X86_32_PIC r1
SSSE3_DeblockChromaLt4 xmm0, xmm1, xmm4, xmm5, xmm7, arg5d, r5, xmm2, xmm3, xmm6, 0
DEINIT_X86_32_PIC
SSE2_StoreCbCr_4x16H r0, r1, r2, r3, xmm1, xmm4, r5, r4d, r4w, xmm0
POP_XMM
LOAD_6_PARA_POP
ret
;***************************************************************************
; void DeblockChromaEq4H_ssse3(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
; int32_t iAlpha, int32_t iBeta)
;***************************************************************************
WELS_EXTERN DeblockChromaEq4H_ssse3
%assign push_num 0
LOAD_4_PARA
PUSH_XMM 8
SIGN_EXTENSION r2, r2d
movd xmm7, arg4d
pxor xmm0, xmm0
pshufb xmm7, xmm0 ; iAlpha
lea r3, [3 * r2 - 1] ; 3 * iStride - 1
SSE2_LoadCbCr_4x16H xmm0, xmm1, xmm4, xmm5, r0, r1, r2, r3, xmm2, xmm3, xmm6
SSSE3_DeblockChromaEq4 xmm0, xmm1, xmm4, xmm5, xmm7, arg5d, xmm2, xmm3, xmm6
%ifdef X86_32
push r4
push r5
SSE2_StoreCbCr_4x16H r0, r1, r2, r3, xmm1, xmm4, r5, r4d, r4w, xmm0
pop r5
pop r4
%else
SSE2_StoreCbCr_4x16H r0, r1, r2, r3, xmm1, xmm4, r5, r4d, r4w, xmm0
%endif
POP_XMM
LOAD_4_PARA_POP
ret
;********************************************************************************
;
; void DeblockLumaTransposeH2V_sse2(uint8_t * pPixY, int32_t iStride, uint8_t * pDst);
;
;********************************************************************************
WELS_EXTERN DeblockLumaTransposeH2V_sse2
push r3
push r4
push r5
%assign push_num 3
LOAD_3_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
mov r5, r7
mov r3, r7
and r3, 0Fh
sub r7, r3
sub r7, 10h
lea r3, [r0 + r1 * 8]
lea r4, [r1 * 3]
movq xmm0, [r0]
movq xmm7, [r3]
punpcklqdq xmm0, xmm7
movq xmm1, [r0 + r1]
movq xmm7, [r3 + r1]
punpcklqdq xmm1, xmm7
movq xmm2, [r0 + r1*2]
movq xmm7, [r3 + r1*2]
punpcklqdq xmm2, xmm7
movq xmm3, [r0 + r4]
movq xmm7, [r3 + r4]
punpcklqdq xmm3, xmm7
lea r0, [r0 + r1 * 4]
lea r3, [r3 + r1 * 4]
movq xmm4, [r0]
movq xmm7, [r3]
punpcklqdq xmm4, xmm7
movq xmm5, [r0 + r1]
movq xmm7, [r3 + r1]
punpcklqdq xmm5, xmm7
movq xmm6, [r0 + r1*2]
movq xmm7, [r3 + r1*2]
punpcklqdq xmm6, xmm7
movdqa [r7], xmm0
movq xmm7, [r0 + r4]
movq xmm0, [r3 + r4]
punpcklqdq xmm7, xmm0
movdqa xmm0, [r7]
SSE2_TransTwo8x8B xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r7]
;pOut: m5, m3, m4, m8, m6, m2, m7, m1
movdqa [r2], xmm4
movdqa [r2 + 10h], xmm2
movdqa [r2 + 20h], xmm3
movdqa [r2 + 30h], xmm7
movdqa [r2 + 40h], xmm5
movdqa [r2 + 50h], xmm1
movdqa [r2 + 60h], xmm6
movdqa [r2 + 70h], xmm0
mov r7, r5
POP_XMM
pop r5
pop r4
pop r3
ret
;*******************************************************************************************
;
; void DeblockLumaTransposeV2H_sse2(uint8_t * pPixY, int32_t iStride, uint8_t * pSrc);
;
;*******************************************************************************************
WELS_EXTERN DeblockLumaTransposeV2H_sse2
push r3
push r4
%assign push_num 2
LOAD_3_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
mov r4, r7
mov r3, r7
and r3, 0Fh
sub r7, r3
sub r7, 10h
movdqa xmm0, [r2]
movdqa xmm1, [r2 + 10h]
movdqa xmm2, [r2 + 20h]
movdqa xmm3, [r2 + 30h]
movdqa xmm4, [r2 + 40h]
movdqa xmm5, [r2 + 50h]
movdqa xmm6, [r2 + 60h]
movdqa xmm7, [r2 + 70h]
SSE2_TransTwo8x8B xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [r7]
;pOut: m5, m3, m4, m8, m6, m2, m7, m1
lea r2, [r1 * 3]
movq [r0], xmm4
movq [r0 + r1], xmm2
movq [r0 + r1*2], xmm3
movq [r0 + r2], xmm7
lea r0, [r0 + r1*4]
movq [r0], xmm5
movq [r0 + r1], xmm1
movq [r0 + r1*2], xmm6
movq [r0 + r2], xmm0
psrldq xmm4, 8
psrldq xmm2, 8
psrldq xmm3, 8
psrldq xmm7, 8
psrldq xmm5, 8
psrldq xmm1, 8
psrldq xmm6, 8
psrldq xmm0, 8
lea r0, [r0 + r1*4]
movq [r0], xmm4
movq [r0 + r1], xmm2
movq [r0 + r1*2], xmm3
movq [r0 + r2], xmm7
lea r0, [r0 + r1*4]
movq [r0], xmm5
movq [r0 + r1], xmm1
movq [r0 + r1*2], xmm6
movq [r0 + r2], xmm0
mov r7, r4
POP_XMM
pop r4
pop r3
ret
WELS_EXTERN WelsNonZeroCount_sse2
%assign push_num 0
LOAD_1_PARA
movdqu xmm0, [r0]
movq xmm1, [r0+16]
WELS_DB1 xmm2
pminub xmm0, xmm2
pminub xmm1, xmm2
movdqu [r0], xmm0
movq [r0+16], xmm1
ret

View File

@ -0,0 +1,728 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* expand_picture.asm
;*
;* Abstract
;* mmxext/sse for expand_frame
;*
;* History
;* 09/25/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
;***********************************************************************
; Macros and other preprocessor constants
;***********************************************************************
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
;;;;;;;expanding result;;;;;;;
;aaaa|attttttttttttttttb|bbbb
;aaaa|attttttttttttttttb|bbbb
;aaaa|attttttttttttttttb|bbbb
;aaaa|attttttttttttttttb|bbbb
;----------------------------
;aaaa|attttttttttttttttb|bbbb
;llll|l r|rrrr
;llll|l r|rrrr
;llll|l r|rrrr
;llll|l r|rrrr
;llll|l r|rrrr
;cccc|ceeeeeeeeeeeeeeeed|dddd
;----------------------------
;cccc|ceeeeeeeeeeeeeeeed|dddd
;cccc|ceeeeeeeeeeeeeeeed|dddd
;cccc|ceeeeeeeeeeeeeeeed|dddd
;cccc|ceeeeeeeeeeeeeeeed|dddd
%macro mov_line_8x4_mmx 3 ; dst, stride, mm?
movq [%1], %3
movq [%1+%2], %3
lea %1, [%1+2*%2]
movq [%1], %3
movq [%1+%2], %3
lea %1, [%1+2*%2]
%endmacro
%macro mov_line_end8x4_mmx 3 ; dst, stride, mm?
movq [%1], %3
movq [%1+%2], %3
lea %1, [%1+2*%2]
movq [%1], %3
movq [%1+%2], %3
lea %1, [%1+%2]
%endmacro
%macro mov_line_16x4_sse2 4 ; dst, stride, xmm?, u/a
movdq%4 [%1], %3 ; top(bottom)_0
movdq%4 [%1+%2], %3 ; top(bottom)_1
lea %1, [%1+2*%2]
movdq%4 [%1], %3 ; top(bottom)_2
movdq%4 [%1+%2], %3 ; top(bottom)_3
lea %1, [%1+2*%2]
%endmacro
%macro mov_line_end16x4_sse2 4 ; dst, stride, xmm?, u/a
movdq%4 [%1], %3 ; top(bottom)_0
movdq%4 [%1+%2], %3 ; top(bottom)_1
lea %1, [%1+2*%2]
movdq%4 [%1], %3 ; top(bottom)_2
movdq%4 [%1+%2], %3 ; top(bottom)_3
lea %1, [%1+%2]
%endmacro
%macro mov_line_32x4_sse2 3 ; dst, stride, xmm?
movdqa [%1], %3 ; top(bottom)_0
movdqa [%1+16], %3 ; top(bottom)_0
movdqa [%1+%2], %3 ; top(bottom)_1
movdqa [%1+%2+16], %3 ; top(bottom)_1
lea %1, [%1+2*%2]
movdqa [%1], %3 ; top(bottom)_2
movdqa [%1+16], %3 ; top(bottom)_2
movdqa [%1+%2], %3 ; top(bottom)_3
movdqa [%1+%2+16], %3 ; top(bottom)_3
lea %1, [%1+2*%2]
%endmacro
%macro mov_line_end32x4_sse2 3 ; dst, stride, xmm?
movdqa [%1], %3 ; top(bottom)_0
movdqa [%1+16], %3 ; top(bottom)_0
movdqa [%1+%2], %3 ; top(bottom)_1
movdqa [%1+%2+16], %3 ; top(bottom)_1
lea %1, [%1+2*%2]
movdqa [%1], %3 ; top(bottom)_2
movdqa [%1+16], %3 ; top(bottom)_2
movdqa [%1+%2], %3 ; top(bottom)_3
movdqa [%1+%2+16], %3 ; top(bottom)_3
lea %1, [%1+%2]
%endmacro
%macro exp_top_bottom_sse2 1 ; iPaddingSize [luma(32)/chroma(16)]
;r2 [width/16(8)]
;r0 [pSrc +0], r5 [pSrc -width] r1[-stride], 32(16) ;top
;r3 [pSrc +(h-1)*stride], r4 [pSrc + (h+31)*stride],32(16); bottom
%if %1 == 32 ; for luma
sar r2, 04h ; width / 16(8) pixels
.top_bottom_loops:
; top
movdqa xmm0, [r0] ; first line of picture pData
mov_line_16x4_sse2 r5, r1, xmm0, a ; dst, stride, xmm?
mov_line_16x4_sse2 r5, r1, xmm0, a
mov_line_16x4_sse2 r5, r1, xmm0, a
mov_line_16x4_sse2 r5, r1, xmm0, a
mov_line_16x4_sse2 r5, r1, xmm0, a ; dst, stride, xmm?
mov_line_16x4_sse2 r5, r1, xmm0, a
mov_line_16x4_sse2 r5, r1, xmm0, a
mov_line_end16x4_sse2 r5, r1, xmm0, a
; bottom
movdqa xmm1, [r3] ; last line of picture pData
mov_line_16x4_sse2 r4, r1, xmm1, a ; dst, stride, xmm?
mov_line_16x4_sse2 r4, r1, xmm1, a
mov_line_16x4_sse2 r4, r1, xmm1, a
mov_line_16x4_sse2 r4, r1, xmm1, a
mov_line_16x4_sse2 r4, r1, xmm1, a ; dst, stride, xmm?
mov_line_16x4_sse2 r4, r1, xmm1, a
mov_line_16x4_sse2 r4, r1, xmm1, a
mov_line_end16x4_sse2 r4, r1, xmm1, a
lea r0, [r0+16] ; top pSrc
lea r5, [r5+16] ; top dst
lea r3, [r3+16] ; bottom pSrc
lea r4, [r4+16] ; bottom dst
neg r1 ; positive/negative stride need for next loop?
dec r2
jnz near .top_bottom_loops
%elif %1 == 16 ; for chroma ??
mov r6, r2
sar r2, 04h ; (width / 16) pixels
.top_bottom_loops:
; top
movdqa xmm0, [r0] ; first line of picture pData
mov_line_16x4_sse2 r5, r1, xmm0, a ; dst, stride, xmm?
mov_line_16x4_sse2 r5, r1, xmm0, a
mov_line_16x4_sse2 r5, r1, xmm0, a
mov_line_end16x4_sse2 r5, r1, xmm0, a
; bottom
movdqa xmm1, [r3] ; last line of picture pData
mov_line_16x4_sse2 r4, r1, xmm1, a ; dst, stride, xmm?
mov_line_16x4_sse2 r4, r1, xmm1, a
mov_line_16x4_sse2 r4, r1, xmm1, a
mov_line_end16x4_sse2 r4, r1, xmm1, a
lea r0, [r0+16] ; top pSrc
lea r5, [r5+16] ; top dst
lea r3, [r3+16] ; bottom pSrc
lea r4, [r4+16] ; bottom dst
neg r1 ; positive/negative stride need for next loop?
dec r2
jnz near .top_bottom_loops
; for remaining 8 bytes
and r6, 0fh ; any 8 bytes left?
test r6, r6
jz near .to_be_continued ; no left to exit here
; top
movq mm0, [r0] ; remained 8 byte
mov_line_8x4_mmx r5, r1, mm0 ; dst, stride, mm?
mov_line_8x4_mmx r5, r1, mm0 ; dst, stride, mm?
mov_line_8x4_mmx r5, r1, mm0 ; dst, stride, mm?
mov_line_end8x4_mmx r5, r1, mm0 ; dst, stride, mm?
; bottom
movq mm1, [r3]
mov_line_8x4_mmx r4, r1, mm1 ; dst, stride, mm?
mov_line_8x4_mmx r4, r1, mm1 ; dst, stride, mm?
mov_line_8x4_mmx r4, r1, mm1 ; dst, stride, mm?
mov_line_end8x4_mmx r4, r1, mm1 ; dst, stride, mm?
WELSEMMS
.to_be_continued:
%endif
%endmacro
%macro exp_left_right_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a
;r6 [height]
;r0 [pSrc+0] r5[pSrc-32] r1[stride]
;r3 [pSrc+(w-1)] r4[pSrc+w]
%if %1 == 32 ; for luma
.left_right_loops:
; left
movzx r2d, byte [r0] ; pixel pData for left border
SSE2_Copy16Times xmm0, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d]
movdqa [r5], xmm0
movdqa [r5+16], xmm0
; right
movzx r2d, byte [r3]
SSE2_Copy16Times xmm1, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d]
movdqa [r4], xmm1
movdqa [r4+16], xmm1
lea r0, [r0+r1] ; left pSrc
lea r5, [r5+r1] ; left dst
lea r3, [r3+r1] ; right pSrc
lea r4, [r4+r1] ; right dst
dec r6
jnz near .left_right_loops
%elif %1 == 16 ; for chroma ??
.left_right_loops:
; left
movzx r2d, byte [r0] ; pixel pData for left border
SSE2_Copy16Times xmm0, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d]
movdqa [r5], xmm0
; right
movzx r2d, byte [r3]
SSE2_Copy16Times xmm1, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d]
movdq%2 [r4], xmm1 ; might not be aligned 16 bytes in case chroma planes
lea r0, [r0+r1] ; left pSrc
lea r5, [r5+r1] ; left dst
lea r3, [r3+r1] ; right pSrc
lea r4, [r4+r1] ; right dst
dec r6
jnz near .left_right_loops
%endif
%endmacro
%macro exp_cross_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a
; top-left: (x)mm3, top-right: (x)mm4, bottom-left: (x)mm5, bottom-right: (x)mm6
; edi: TL, ebp: TR, eax: BL, ebx: BR, ecx, -stride
;r3:TL ,r4:TR,r5:BL,r6:BR r1:-stride
%if %1 == 32 ; luma
; TL
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
mov_line_end32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
; TR
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
mov_line_end32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
; BL
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
mov_line_end32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
; BR
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
mov_line_end32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
%elif %1 == 16 ; chroma
; TL
mov_line_16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm?
mov_line_16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm?
mov_line_16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm?
mov_line_end16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm?
; TR
mov_line_16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm?
mov_line_16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm?
mov_line_16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm?
mov_line_end16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm?
; BL
mov_line_16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm?
mov_line_16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm?
mov_line_16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm?
mov_line_end16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm?
; BR
mov_line_16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm?
mov_line_16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm?
mov_line_16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm?
mov_line_end16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm?
%endif
%endmacro
;***********************************************************************----------------
; void ExpandPictureLuma_sse2( uint8_t *pDst,
; const int32_t iStride,
; const int32_t iWidth,
; const int32_t iHeight );
;***********************************************************************----------------
WELS_EXTERN ExpandPictureLuma_sse2
push r4
push r5
push r6
%assign push_num 3
LOAD_4_PARA
PUSH_XMM 7
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r2, r2d
SIGN_EXTENSION r3, r3d
;also prepare for cross border pData top-left:xmm3
movzx r6d,byte[r0]
SSE2_Copy16Times xmm3,r6d ;xmm3: pSrc[0]
neg r1
lea r5,[r0+r1] ;last line of top border r5= dst top pSrc[-stride]
neg r1
push r3
dec r3 ;h-1
imul r3,r1 ;(h-1)*stride
lea r3,[r0+r3] ;pSrc[(h-1)*stride] r3 = src bottom
mov r6,r1 ;r6 = stride
sal r6,05h ;r6 = 32*stride
lea r4,[r3+r6] ;r4 = dst bottom
;also prepare for cross border data: bottom-left with xmm5,bottom-right xmm6
movzx r6d,byte [r3] ;bottom-left
SSE2_Copy16Times xmm5,r6d
lea r6,[r3+r2-1]
movzx r6d,byte [r6]
SSE2_Copy16Times xmm6,r6d ;bottom-right
neg r1 ;r1 = -stride
push r0
push r1
push r2
exp_top_bottom_sse2 32
; for both left and right border
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
pop r2
pop r1
pop r0
lea r5,[r0-32] ;left border dst luma =32 chroma = -16
lea r3,[r0+r2-1] ;right border src
lea r4,[r3+1] ;right border dst
;prepare for cross border data: top-rigth with xmm4
movzx r6d,byte [r3] ;top -rigth
SSE2_Copy16Times xmm4,r6d
neg r1 ;r1 = stride
pop r6 ; r6 = height
push r0
push r1
push r2
push r6
exp_left_right_sse2 32,a
pop r6
pop r2
pop r1
pop r0
; for cross border [top-left, top-right, bottom-left, bottom-right]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
neg r1 ;r1 = -stride
lea r3,[r0-32]
lea r3,[r3+r1] ;last line of top-left border
lea r4,[r0+r2] ;psrc +width
lea r4,[r4+r1] ;psrc +width -stride
neg r1 ;r1 = stride
add r6,32 ;height +32(16) ,luma = 32, chroma = 16
imul r6,r1
lea r5,[r3+r6] ;last line of bottom-left border
lea r6,[r4+r6] ;last line of botoom-right border
neg r1 ; r1 = -stride
; for left & right border expanding
exp_cross_sse2 32,a
POP_XMM
LOAD_4_PARA_POP
pop r6
pop r5
pop r4
%assign push_num 0
ret
;***********************************************************************----------------
; void ExpandPictureChromaAlign_sse2( uint8_t *pDst,
; const int32_t iStride,
; const int32_t iWidth,
; const int32_t iHeight );
;***********************************************************************----------------
WELS_EXTERN ExpandPictureChromaAlign_sse2
push r4
push r5
push r6
%assign push_num 3
LOAD_4_PARA
PUSH_XMM 7
SIGN_EXTENSION r1,r1d
SIGN_EXTENSION r2,r2d
SIGN_EXTENSION r3,r3d
;also prepare for cross border pData top-left:xmm3
movzx r6d,byte [r0]
SSE2_Copy16Times xmm3,r6d ;xmm3: pSrc[0]
neg r1
lea r5,[r0+r1] ;last line of top border r5= dst top pSrc[-stride]
neg r1
push r3
dec r3 ;h-1
imul r3,r1 ;(h-1)*stride
lea r3,[r0+r3] ;pSrc[(h-1)*stride] r3 = src bottom
mov r6,r1 ;r6 = stride
sal r6,04h ;r6 = 32*stride
lea r4,[r3+r6] ;r4 = dst bottom
;also prepare for cross border data: bottom-left with xmm5,bottom-right xmm6
movzx r6d,byte [r3] ;bottom-left
SSE2_Copy16Times xmm5,r6d
lea r6,[r3+r2-1]
movzx r6d,byte [r6]
SSE2_Copy16Times xmm6,r6d ;bottom-right
neg r1 ;r1 = -stride
push r0
push r1
push r2
exp_top_bottom_sse2 16
; for both left and right border
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
pop r2
pop r1
pop r0
lea r5,[r0-16] ;left border dst luma =32 chroma = -16
lea r3,[r0+r2-1] ;right border src
lea r4,[r3+1] ;right border dst
;prepare for cross border data: top-rigth with xmm4
movzx r6d,byte [r3] ;top -rigth
SSE2_Copy16Times xmm4,r6d
neg r1 ;r1 = stride
pop r6 ; r6 = height
push r0
push r1
push r2
push r6
exp_left_right_sse2 16,a
pop r6
pop r2
pop r1
pop r0
; for cross border [top-left, top-right, bottom-left, bottom-right]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
neg r1 ;r1 = -stride
lea r3,[r0-16]
lea r3,[r3+r1] ;last line of top-left border
lea r4,[r0+r2] ;psrc +width
lea r4,[r4+r1] ;psrc +width -stride
neg r1 ;r1 = stride
add r6,16 ;height +32(16) ,luma = 32, chroma = 16
imul r6,r1
lea r5,[r3+r6] ;last line of bottom-left border
lea r6,[r4+r6] ;last line of botoom-right border
neg r1 ; r1 = -stride
; for left & right border expanding
exp_cross_sse2 16,a
POP_XMM
LOAD_4_PARA_POP
pop r6
pop r5
pop r4
%assign push_num 0
ret
;***********************************************************************----------------
; void ExpandPictureChromaUnalign_sse2( uint8_t *pDst,
; const int32_t iStride,
; const int32_t iWidth,
; const int32_t iHeight );
;***********************************************************************----------------
WELS_EXTERN ExpandPictureChromaUnalign_sse2
push r4
push r5
push r6
%assign push_num 3
LOAD_4_PARA
PUSH_XMM 7
SIGN_EXTENSION r1,r1d
SIGN_EXTENSION r2,r2d
SIGN_EXTENSION r3,r3d
;also prepare for cross border pData top-left:xmm3
movzx r6d,byte [r0]
SSE2_Copy16Times xmm3,r6d ;xmm3: pSrc[0]
neg r1
lea r5,[r0+r1] ;last line of top border r5= dst top pSrc[-stride]
neg r1
push r3
dec r3 ;h-1
imul r3,r1 ;(h-1)*stride
lea r3,[r0+r3] ;pSrc[(h-1)*stride] r3 = src bottom
mov r6,r1 ;r6 = stride
sal r6,04h ;r6 = 32*stride
lea r4,[r3+r6] ;r4 = dst bottom
;also prepare for cross border data: bottom-left with xmm5,bottom-right xmm6
movzx r6d,byte [r3] ;bottom-left
SSE2_Copy16Times xmm5,r6d
lea r6,[r3+r2-1]
movzx r6d,byte [r6]
SSE2_Copy16Times xmm6,r6d ;bottom-right
neg r1 ;r1 = -stride
push r0
push r1
push r2
exp_top_bottom_sse2 16
; for both left and right border
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
pop r2
pop r1
pop r0
lea r5,[r0-16] ;left border dst luma =32 chroma = -16
lea r3,[r0+r2-1] ;right border src
lea r4,[r3+1] ;right border dst
;prepare for cross border data: top-rigth with xmm4
movzx r6d,byte [r3] ;top -rigth
SSE2_Copy16Times xmm4,r6d
neg r1 ;r1 = stride
pop r6 ; r6 = height
push r0
push r1
push r2
push r6
exp_left_right_sse2 16,u
pop r6
pop r2
pop r1
pop r0
; for cross border [top-left, top-right, bottom-left, bottom-right]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
neg r1 ;r1 = -stride
lea r3,[r0-16]
lea r3,[r3+r1] ;last line of top-left border
lea r4,[r0+r2] ;psrc +width
lea r4,[r4+r1] ;psrc +width -stride
neg r1 ;r1 = stride
add r6,16 ;height +32(16) ,luma = 32, chroma = 16
imul r6,r1
lea r5,[r3+r6] ;last line of bottom-left border
lea r6,[r4+r6] ;last line of botoom-right border
neg r1 ; r1 = -stride
; for left & right border expanding
exp_cross_sse2 16,u
POP_XMM
LOAD_4_PARA_POP
pop r6
pop r5
pop r4
%assign push_num 0
ret

View File

@ -0,0 +1,117 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* intra_pred_common.asm
;*
;* Abstract
;* sse2 function for intra predict operations
;*
;* History
;* 18/09/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
;***********************************************************************
; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
%macro SSE2_PRED_H_16X16_ONE_LINE 0
add r0, 16
add r1, r2
movzx r3, byte [r1]
SSE2_Copy16Times xmm0, r3d
movdqa [r0], xmm0
%endmacro
WELS_EXTERN WelsI16x16LumaPredH_sse2
push r3
%assign push_num 1
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
dec r1
movzx r3, byte [r1]
SSE2_Copy16Times xmm0, r3d
movdqa [r0], xmm0
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
pop r3
ret
;***********************************************************************
; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
WELS_EXTERN WelsI16x16LumaPredV_sse2
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
sub r1, r2
movdqa xmm0, [r1]
movdqa [r0], xmm0
movdqa [r0+10h], xmm0
movdqa [r0+20h], xmm0
movdqa [r0+30h], xmm0
movdqa [r0+40h], xmm0
movdqa [r0+50h], xmm0
movdqa [r0+60h], xmm0
movdqa [r0+70h], xmm0
movdqa [r0+80h], xmm0
movdqa [r0+90h], xmm0
movdqa [r0+160], xmm0
movdqa [r0+176], xmm0
movdqa [r0+192], xmm0
movdqa [r0+208], xmm0
movdqa [r0+224], xmm0
movdqa [r0+240], xmm0
ret

View File

@ -0,0 +1,615 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* mb_copy.asm
;*
;* Abstract
;* mb_copy and mb_copy1
;*
;* History
;* 15/09/2009 Created
;* 12/28/2009 Modified with larger throughput
;* 12/29/2011 Tuned WelsCopy16x16NotAligned_sse2, added UpdateMbMv_sse2 WelsCopy16x8NotAligned_sse2,
;* WelsCopy16x8_mmx, WelsCopy8x16_mmx etc;
;*
;*
;*********************************************************************************************/
%include "asm_inc.asm"
%ifdef __NASM_VER__
%use smartalign
%endif
;***********************************************************************
; Macros and other preprocessor constants
;***********************************************************************
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
;***********************************************************************
; void WelsCopy16x16_sse2( uint8_t* Dst,
; int32_t iStrideD,
; uint8_t* Src,
; int32_t iStrideS )
;***********************************************************************
WELS_EXTERN WelsCopy16x16_sse2
push r4
push r5
%assign push_num 2
LOAD_4_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
lea r4, [r1+2*r1] ;ebx, [eax+2*eax] ; x3
lea r5, [r3+2*r3] ;edx, [ecx+2*ecx] ; x3
movdqa xmm0, [r2]
movdqa xmm1, [r2+r3]
movdqa xmm2, [r2+2*r3]
movdqa xmm3, [r2+r5]
lea r2, [r2+4*r3]
movdqa xmm4, [r2]
movdqa xmm5, [r2+r3]
movdqa xmm6, [r2+2*r3]
movdqa xmm7, [r2+r5]
lea r2, [r2+4*r3]
movdqa [r0], xmm0
movdqa [r0+r1], xmm1
movdqa [r0+2*r1], xmm2
movdqa [r0+r4], xmm3
lea r0, [r0+4*r1]
movdqa [r0], xmm4
movdqa [r0+r1], xmm5
movdqa [r0+2*r1], xmm6
movdqa [r0+r4], xmm7
lea r0, [r0+4*r1]
movdqa xmm0, [r2]
movdqa xmm1, [r2+r3]
movdqa xmm2, [r2+2*r3]
movdqa xmm3, [r2+r5]
lea r2, [r2+4*r3]
movdqa xmm4, [r2]
movdqa xmm5, [r2+r3]
movdqa xmm6, [r2+2*r3]
movdqa xmm7, [r2+r5]
movdqa [r0], xmm0
movdqa [r0+r1], xmm1
movdqa [r0+2*r1], xmm2
movdqa [r0+r4], xmm3
lea r0, [r0+4*r1]
movdqa [r0], xmm4
movdqa [r0+r1], xmm5
movdqa [r0+2*r1], xmm6
movdqa [r0+r4], xmm7
POP_XMM
LOAD_4_PARA_POP
pop r5
pop r4
ret
;***********************************************************************
; void WelsCopy16x16NotAligned_sse2( uint8_t* Dst,
; int32_t iStrideD,
; uint8_t* Src,
; int32_t iStrideS )
;***********************************************************************
; dst can be align with 16 bytes, but not sure about pSrc, 12/29/2011
WELS_EXTERN WelsCopy16x16NotAligned_sse2
push r4
push r5
%assign push_num 2
LOAD_4_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
lea r4, [r1+2*r1] ;ebx, [eax+2*eax] ; x3
lea r5, [r3+2*r3] ;edx, [ecx+2*ecx] ; x3
movdqu xmm0, [r2]
movdqu xmm1, [r2+r3]
movdqu xmm2, [r2+2*r3]
movdqu xmm3, [r2+r5]
lea r2, [r2+4*r3]
movdqu xmm4, [r2]
movdqu xmm5, [r2+r3]
movdqu xmm6, [r2+2*r3]
movdqu xmm7, [r2+r5]
lea r2, [r2+4*r3]
movdqa [r0], xmm0
movdqa [r0+r1], xmm1
movdqa [r0+2*r1], xmm2
movdqa [r0+r4], xmm3
lea r0, [r0+4*r1]
movdqa [r0], xmm4
movdqa [r0+r1], xmm5
movdqa [r0+2*r1], xmm6
movdqa [r0+r4], xmm7
lea r0, [r0+4*r1]
movdqu xmm0, [r2]
movdqu xmm1, [r2+r3]
movdqu xmm2, [r2+2*r3]
movdqu xmm3, [r2+r5]
lea r2, [r2+4*r3]
movdqu xmm4, [r2]
movdqu xmm5, [r2+r3]
movdqu xmm6, [r2+2*r3]
movdqu xmm7, [r2+r5]
movdqa [r0], xmm0
movdqa [r0+r1], xmm1
movdqa [r0+2*r1], xmm2
movdqa [r0+r4], xmm3
lea r0, [r0+4*r1]
movdqa [r0], xmm4
movdqa [r0+r1], xmm5
movdqa [r0+2*r1], xmm6
movdqa [r0+r4], xmm7
POP_XMM
LOAD_4_PARA_POP
pop r5
pop r4
ret
; , 12/29/2011
;***********************************************************************
; void WelsCopy16x8NotAligned_sse2(uint8_t* Dst,
; int32_t iStrideD,
; uint8_t* Src,
; int32_t iStrideS )
;***********************************************************************
WELS_EXTERN WelsCopy16x8NotAligned_sse2
push r4
push r5
%assign push_num 2
LOAD_4_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
lea r4, [r1+2*r1] ;ebx, [eax+2*eax] ; x3
lea r5, [r3+2*r3] ;edx, [ecx+2*ecx] ; x3
movdqu xmm0, [r2]
movdqu xmm1, [r2+r3]
movdqu xmm2, [r2+2*r3]
movdqu xmm3, [r2+r5]
lea r2, [r2+4*r3]
movdqu xmm4, [r2]
movdqu xmm5, [r2+r3]
movdqu xmm6, [r2+2*r3]
movdqu xmm7, [r2+r5]
movdqa [r0], xmm0
movdqa [r0+r1], xmm1
movdqa [r0+2*r1], xmm2
movdqa [r0+r4], xmm3
lea r0, [r0+4*r1]
movdqa [r0], xmm4
movdqa [r0+r1], xmm5
movdqa [r0+2*r1], xmm6
movdqa [r0+r4], xmm7
POP_XMM
LOAD_4_PARA_POP
pop r5
pop r4
ret
;***********************************************************************
; void WelsCopy8x16_mmx(uint8_t* Dst,
; int32_t iStrideD,
; uint8_t* Src,
; int32_t iStrideS )
;***********************************************************************
WELS_EXTERN WelsCopy8x16_mmx
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
movq mm0, [r2]
movq mm1, [r2+r3]
lea r2, [r2+2*r3]
movq mm2, [r2]
movq mm3, [r2+r3]
lea r2, [r2+2*r3]
movq mm4, [r2]
movq mm5, [r2+r3]
lea r2, [r2+2*r3]
movq mm6, [r2]
movq mm7, [r2+r3]
lea r2, [r2+2*r3]
movq [r0], mm0
movq [r0+r1], mm1
lea r0, [r0+2*r1]
movq [r0], mm2
movq [r0+r1], mm3
lea r0, [r0+2*r1]
movq [r0], mm4
movq [r0+r1], mm5
lea r0, [r0+2*r1]
movq [r0], mm6
movq [r0+r1], mm7
lea r0, [r0+2*r1]
movq mm0, [r2]
movq mm1, [r2+r3]
lea r2, [r2+2*r3]
movq mm2, [r2]
movq mm3, [r2+r3]
lea r2, [r2+2*r3]
movq mm4, [r2]
movq mm5, [r2+r3]
lea r2, [r2+2*r3]
movq mm6, [r2]
movq mm7, [r2+r3]
movq [r0], mm0
movq [r0+r1], mm1
lea r0, [r0+2*r1]
movq [r0], mm2
movq [r0+r1], mm3
lea r0, [r0+2*r1]
movq [r0], mm4
movq [r0+r1], mm5
lea r0, [r0+2*r1]
movq [r0], mm6
movq [r0+r1], mm7
WELSEMMS
LOAD_4_PARA_POP
ret
;***********************************************************************
; void WelsCopy8x8_mmx( uint8_t* Dst,
; int32_t iStrideD,
; uint8_t* Src,
; int32_t iStrideS )
;***********************************************************************
WELS_EXTERN WelsCopy8x8_mmx
push r4
%assign push_num 1
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
lea r4, [r3+2*r3] ;edx, [ebx+2*ebx]
; to prefetch next loop
prefetchnta [r2+2*r3]
prefetchnta [r2+r4]
movq mm0, [r2]
movq mm1, [r2+r3]
lea r2, [r2+2*r3]
; to prefetch next loop
prefetchnta [r2+2*r3]
prefetchnta [r2+r4]
movq mm2, [r2]
movq mm3, [r2+r3]
lea r2, [r2+2*r3]
; to prefetch next loop
prefetchnta [r2+2*r3]
prefetchnta [r2+r4]
movq mm4, [r2]
movq mm5, [r2+r3]
lea r2, [r2+2*r3]
movq mm6, [r2]
movq mm7, [r2+r3]
movq [r0], mm0
movq [r0+r1], mm1
lea r0, [r0+2*r1]
movq [r0], mm2
movq [r0+r1], mm3
lea r0, [r0+2*r1]
movq [r0], mm4
movq [r0+r1], mm5
lea r0, [r0+2*r1]
movq [r0], mm6
movq [r0+r1], mm7
WELSEMMS
LOAD_4_PARA_POP
pop r4
ret
; (dunhuang@cisco), 12/21/2011
;***********************************************************************
; void UpdateMbMv_sse2( SMVUnitXY *pMvBuffer, const SMVUnitXY sMv )
;***********************************************************************
WELS_EXTERN UpdateMbMv_sse2
%assign push_num 0
LOAD_2_PARA
movd xmm0, r1d ; _mv
pshufd xmm1, xmm0, $00
movdqa [r0 ], xmm1
movdqa [r0+0x10], xmm1
movdqa [r0+0x20], xmm1
movdqa [r0+0x30], xmm1
ret
;*******************************************************************************
; Macros and other preprocessor constants
;*******************************************************************************
;*******************************************************************************
; Code
;*******************************************************************************
SECTION .text
;*******************************************************************************
; void PixelAvgWidthEq4_mmx( uint8_t *pDst, int iDstStride,
; uint8_t *pSrcA, int iSrcAStride,
; uint8_t *pSrcB, int iSrcBStride,
; int iHeight );
;*******************************************************************************
WELS_EXTERN PixelAvgWidthEq4_mmx
%assign push_num 0
LOAD_7_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
SIGN_EXTENSION r6, r6d
ALIGN 4
.height_loop:
movd mm0, [r4]
pavgb mm0, [r2]
movd [r0], mm0
dec r6
lea r0, [r0+r1]
lea r2, [r2+r3]
lea r4, [r4+r5]
jne .height_loop
WELSEMMS
LOAD_7_PARA_POP
ret
;*******************************************************************************
; void PixelAvgWidthEq8_mmx( uint8_t *pDst, int iDstStride,
; uint8_t *pSrcA, int iSrcAStride,
; uint8_t *pSrcB, int iSrcBStride,
; int iHeight );
;*******************************************************************************
WELS_EXTERN PixelAvgWidthEq8_mmx
%assign push_num 0
LOAD_7_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
SIGN_EXTENSION r6, r6d
ALIGN 4
.height_loop:
movq mm0, [r2]
pavgb mm0, [r4]
movq [r0], mm0
movq mm0, [r2+r3]
pavgb mm0, [r4+r5]
movq [r0+r1], mm0
lea r2, [r2+2*r3]
lea r4, [r4+2*r5]
lea r0, [r0+2*r1]
sub r6, 2
jnz .height_loop
WELSEMMS
LOAD_7_PARA_POP
ret
;*******************************************************************************
; void PixelAvgWidthEq16_sse2( uint8_t *pDst, int iDstStride,
; uint8_t *pSrcA, int iSrcAStride,
; uint8_t *pSrcB, int iSrcBStride,
; int iHeight );
;*******************************************************************************
WELS_EXTERN PixelAvgWidthEq16_sse2
%assign push_num 0
LOAD_7_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
SIGN_EXTENSION r6, r6d
ALIGN 4
.height_loop:
movdqu xmm0, [r2]
movdqu xmm1, [r4]
pavgb xmm0, xmm1
;pavgb xmm0, [r4]
movdqu [r0], xmm0
movdqu xmm0, [r2+r3]
movdqu xmm1, [r4+r5]
pavgb xmm0, xmm1
movdqu [r0+r1], xmm0
movdqu xmm0, [r2+2*r3]
movdqu xmm1, [r4+2*r5]
pavgb xmm0, xmm1
movdqu [r0+2*r1], xmm0
lea r2, [r2+2*r3]
lea r4, [r4+2*r5]
lea r0, [r0+2*r1]
movdqu xmm0, [r2+r3]
movdqu xmm1, [r4+r5]
pavgb xmm0, xmm1
movdqu [r0+r1], xmm0
lea r2, [r2+2*r3]
lea r4, [r4+2*r5]
lea r0, [r0+2*r1]
sub r6, 4
jne .height_loop
WELSEMMS
LOAD_7_PARA_POP
ret
; load_instr=%1 store_instr=%2 p_dst=%3 i_dststride=%4 p_src=%5 i_srcstride=%6 cnt=%7 r_tmp=%8,%9 mm_tmp=%10,%11
%macro CopyStrided4N 11
lea %8, [3 * %6]
lea %9, [3 * %4]
ALIGN 32
%%loop:
%1 %10, [%5]
%1 %11, [%5 + %6]
%2 [%3], %10
%2 [%3 + %4], %11
%1 %10, [%5 + 2 * %6]
%1 %11, [%5 + %8]
%2 [%3 + 2 * %4], %10
%2 [%3 + %9], %11
lea %5, [%5 + 4 * %6]
lea %3, [%3 + 4 * %4]
sub %7, 4
jg %%loop
%endmacro
;*******************************************************************************
; void McCopyWidthEq8_mmx( uint8_t *pSrc, int iSrcStride,
; uint8_t *pDst, int iDstStride, int iHeight )
;*******************************************************************************
WELS_EXTERN McCopyWidthEq8_mmx
%assign push_num 0
%ifdef X86_32
push r5
push r6
%assign push_num 2
%endif
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r4, r4d
CopyStrided4N movq, movq, r2, r3, r0, r1, r4, r5, r6, mm0, mm1
WELSEMMS
LOAD_5_PARA_POP
%ifdef X86_32
pop r6
pop r5
%endif
ret
;*******************************************************************************
; void McCopyWidthEq16_sse2( uint8_t *pSrc, int iSrcStride, uint8_t *pDst, int iDstStride, int iHeight )
;*******************************************************************************
;read unaligned memory
%macro SSE_READ_UNA 2
movq %1, [%2]
movhps %1, [%2+8]
%endmacro
;write unaligned memory
%macro SSE_WRITE_UNA 2
movq [%1], %2
movhps [%1+8], %2
%endmacro
WELS_EXTERN McCopyWidthEq16_sse2
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r4, r4d
ALIGN 4
.height_loop:
SSE_READ_UNA xmm0, r0
SSE_READ_UNA xmm1, r0+r1
SSE_WRITE_UNA r2, xmm0
SSE_WRITE_UNA r2+r3, xmm1
sub r4, 2
lea r0, [r0+r1*2]
lea r2, [r2+r3*2]
jnz .height_loop
LOAD_5_PARA_POP
ret
;*******************************************************************************
; void McCopyWidthEq16_sse3( uint8_t *pSrc, int iSrcStride, uint8_t *pDst, int iDstStride, int iHeight )
;*******************************************************************************
WELS_EXTERN McCopyWidthEq16_sse3
%assign push_num 0
%ifdef X86_32
push r5
push r6
%assign push_num 2
%endif
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r4, r4d
CopyStrided4N lddqu, MOVDQ, r2, r3, r0, r1, r4, r5, r6, xmm0, xmm1
LOAD_5_PARA_POP
%ifdef X86_32
pop r6
pop r5
%endif
ret

View File

@ -0,0 +1,313 @@
;*!
;* \copy
;* Copyright (c) 2004-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* mc_chroma.asm
;*
;* Abstract
;* mmx motion compensation for chroma
;*
;* History
;* 10/13/2004 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
;***********************************************************************
; Local Data (Read Only)
;***********************************************************************
SECTION .rodata align=16
;***********************************************************************
; Various memory constants (trigonometric values or rounding values)
;***********************************************************************
ALIGN 16
h264_d0x20_sse2:
dw 32,32,32,32,32,32,32,32
ALIGN 16
h264_d0x20_mmx:
dw 32,32,32,32
;=============================================================================
; Code
;=============================================================================
SECTION .text
;*******************************************************************************
; void McChromaWidthEq4_mmx( const uint8_t *src,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; const uint8_t *pABCD,
; int32_t iHeigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq4_mmx
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
movd mm3, [r4]; [eax]
WELS_Zero mm7
punpcklbw mm3, mm3
movq mm4, mm3
punpcklwd mm3, mm3
punpckhwd mm4, mm4
movq mm5, mm3
punpcklbw mm3, mm7
punpckhbw mm5, mm7
movq mm6, mm4
punpcklbw mm4, mm7
punpckhbw mm6, mm7
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
movd mm0, [r0]
movd mm1, [r0+1]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
.xloop:
pmullw mm0, mm3
pmullw mm1, mm5
paddw mm0, mm1
movd mm1, [r4]
punpcklbw mm1, mm7
movq mm2, mm1
pmullw mm1, mm4
paddw mm0, mm1
movd mm1, [r4+1]
punpcklbw mm1, mm7
movq mm7, mm1
pmullw mm1,mm6
paddw mm0, mm1
movq mm1,mm7
%ifdef X86_32_PICASM
pcmpeqw mm7, mm7
psrlw mm7, 15
psllw mm7, 5
paddw mm0, mm7
%else
paddw mm0, [h264_d0x20_mmx]
%endif
psrlw mm0, 6
WELS_Zero mm7
packuswb mm0, mm7
movd [r2], mm0
movq mm0, mm2
lea r2, [r2 + r3]
lea r4, [r4 + r1]
dec r5
jnz near .xloop
WELSEMMS
LOAD_6_PARA_POP
ret
;*******************************************************************************
; void McChromaWidthEq8_sse2( const uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; const uint8_t *pABCD,
; int32_t iheigh );
;*******************************************************************************
WELS_EXTERN McChromaWidthEq8_sse2
%assign push_num 0
LOAD_6_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
movd xmm3, [r4]
WELS_Zero xmm7
punpcklbw xmm3, xmm3
punpcklwd xmm3, xmm3
movdqa xmm4, xmm3
punpckldq xmm3, xmm3
punpckhdq xmm4, xmm4
movdqa xmm5, xmm3
movdqa xmm6, xmm4
punpcklbw xmm3, xmm7
punpckhbw xmm5, xmm7
punpcklbw xmm4, xmm7
punpckhbw xmm6, xmm7
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
movq xmm0, [r0]
movq xmm1, [r0+1]
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
.xloop:
pmullw xmm0, xmm3
pmullw xmm1, xmm5
paddw xmm0, xmm1
movq xmm1, [r4]
punpcklbw xmm1, xmm7
movdqa xmm2, xmm1
pmullw xmm1, xmm4
paddw xmm0, xmm1
movq xmm1, [r4+1]
punpcklbw xmm1, xmm7
movdqa xmm7, xmm1
pmullw xmm1, xmm6
paddw xmm0, xmm1
movdqa xmm1,xmm7
%ifdef X86_32_PICASM
pcmpeqw xmm7, xmm7
psrlw xmm7, 15
psllw xmm7, 5
paddw xmm0, xmm7
%else
paddw xmm0, [h264_d0x20_sse2]
%endif
psrlw xmm0, 6
WELS_Zero xmm7
packuswb xmm0, xmm7
movq [r2], xmm0
movdqa xmm0, xmm2
lea r2, [r2 + r3]
lea r4, [r4 + r1]
dec r5
jnz near .xloop
POP_XMM
LOAD_6_PARA_POP
ret
;***********************************************************************
; void McChromaWidthEq8_ssse3( const uint8_t *pSrc,
; int32_t iSrcStride,
; uint8_t *pDst,
; int32_t iDstStride,
; const uint8_t *pABCD,
; int32_t iHeigh);
;***********************************************************************
WELS_EXTERN McChromaWidthEq8_ssse3
%assign push_num 0
LOAD_6_PARA
PUSH_XMM 8
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
pxor xmm7, xmm7
movd xmm5, [r4]
punpcklwd xmm5, xmm5
punpckldq xmm5, xmm5
movdqa xmm6, xmm5
punpcklqdq xmm5, xmm5
punpckhqdq xmm6, xmm6
sub r2, r3 ;sub esi, edi
sub r2, r3
%ifdef X86_32_PICASM
pcmpeqw xmm7, xmm7
psrlw xmm7, 15
psllw xmm7, 5
%else
movdqa xmm7, [h264_d0x20_sse2]
%endif
movdqu xmm0, [r0]
movdqa xmm1, xmm0
psrldq xmm1, 1
punpcklbw xmm0, xmm1
.hloop_chroma:
lea r2, [r2+2*r3]
movdqu xmm2, [r0+r1]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm4, xmm2
pmaddubsw xmm0, xmm5
pmaddubsw xmm2, xmm6
paddw xmm0, xmm2
paddw xmm0, xmm7
psrlw xmm0, 6
packuswb xmm0, xmm0
movq [r2],xmm0
lea r0, [r0+2*r1]
movdqu xmm2, [r0]
movdqa xmm3, xmm2
psrldq xmm3, 1
punpcklbw xmm2, xmm3
movdqa xmm0, xmm2
pmaddubsw xmm4, xmm5
pmaddubsw xmm2, xmm6
paddw xmm4, xmm2
paddw xmm4, xmm7
psrlw xmm4, 6
packuswb xmm4, xmm4
movq [r2+r3],xmm4
sub r5, 2
jnz .hloop_chroma
POP_XMM
LOAD_6_PARA_POP
ret

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,411 @@
;*!
;* \copy
;* Copyright (c) 2010-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* vaa.asm
;*
;* Abstract
;* sse2 for pVaa routines
;*
;* History
;* 04/14/2010 Created
;* 06/07/2010 Added AnalysisVaaInfoIntra_sse2(ssse3)
;* 06/10/2010 Tune rc_sad_frame_sse2 and got about 40% improvement
;* 08/11/2010 Added abs_difference_mbrow_sse2 & sum_sqrsum_mbrow_sse2
;*
;*************************************************************************/
%include "asm_inc.asm"
;***********************************************************************
; Macros and other preprocessor constants
;***********************************************************************
; by comparing it outperforms than phaddw(SSSE3) sets
%macro SUM_WORD_8x2_SSE2 2 ; dst(pSrc), tmp
; @sum_8x2 begin
pshufd %2, %1, 04Eh ; 01001110 B
paddw %1, %2
pshuflw %2, %1, 04Eh ; 01001110 B
paddw %1, %2
pshuflw %2, %1, 0B1h ; 10110001 B
paddw %1, %2
; end of @sum_8x2
%endmacro ; END of SUM_WORD_8x2_SSE2
%macro VAA_AVG_BLOCK_SSE2 6 ; dst, t0, t1, t2, t3, t4
movdqa %1, [r0 ] ; line 0
movdqa %2, [r0+r1] ; line 1
movdqa %3, %1
punpcklbw %1, xmm7
punpckhbw %3, xmm7
movdqa %4, %2
punpcklbw %4, xmm7
punpckhbw %2, xmm7
paddw %1, %4
paddw %2, %3
movdqa %3, [r0+r2] ; line 2
movdqa %4, [r0+r3] ; line 3
movdqa %5, %3
punpcklbw %3, xmm7
punpckhbw %5, xmm7
movdqa %6, %4
punpcklbw %6, xmm7
punpckhbw %4, xmm7
paddw %3, %6
paddw %4, %5
paddw %1, %3 ; block 0, 1
paddw %2, %4 ; block 2, 3
pshufd %3, %1, 0B1h
pshufd %4, %2, 0B1h
paddw %1, %3
paddw %2, %4
movdqa %3, %1
movdqa %4, %2
pshuflw %5, %1, 0B1h
pshufhw %6, %3, 0B1h
paddw %1, %5
paddw %3, %6
pshuflw %5, %2, 0B1h
pshufhw %6, %4, 0B1h
paddw %2, %5
paddw %4, %6
punpcklwd %1, %2
punpckhwd %3, %4
punpcklwd %1, %3
psraw %1, $04
%endmacro
%macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4
movdqa %1, [r0 ] ; line 0
movdqa %2, [r0+r1] ; line 1
movdqa %3, %1
punpcklbw %1, xmm7
punpckhbw %3, xmm7
movdqa %4, %2
punpcklbw %4, xmm7
punpckhbw %2, xmm7
paddw %1, %4
paddw %2, %3
movdqa %3, [r0+r2] ; line 2
movdqa %4, [r0+r3] ; line 3
movdqa %5, %3
punpcklbw %3, xmm7
punpckhbw %5, xmm7
movdqa %6, %4
punpcklbw %6, xmm7
punpckhbw %4, xmm7
paddw %3, %6
paddw %4, %5
paddw %1, %3 ; block 0, 1
paddw %2, %4 ; block 2, 3
phaddw %1, %2 ; block[0]: 0-15, 16-31; block[1]: 32-47, 48-63; ..
phaddw %1, xmm7 ; block[0]: 0-15; block[1]: 16-31; block[2]: 32-47; block[3]: 48-63; ....
psraw %1, $04
%endmacro
;***********************************************************************
; Code
;***********************************************************************
SECTION .text
; , 6/7/2010
;***********************************************************************
; int32_t AnalysisVaaInfoIntra_sse2( uint8_t *pDataY, const int32_t iLineSize );
;***********************************************************************
WELS_EXTERN AnalysisVaaInfoIntra_sse2
%assign push_num 0
LOAD_2_PARA
PUSH_XMM 8
SIGN_EXTENSION r1,r1d
%ifdef X86_32
push r3
push r4
push r5
push r6
%assign push_num push_num+4
%endif
mov r5,r7
and r5,0fh
sub r7,r5
sub r7,32
mov r2,r1
sal r2,$01 ;r2 = 2*iLineSize
mov r3,r2
add r3,r1 ;r3 = 3*iLineSize
mov r4,r2
sal r4,$01 ;r4 = 4*iLineSize
pxor xmm7, xmm7
; loops
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
movq [r7], xmm0
lea r0, [r0+r4]
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
movq [r7+8], xmm0
lea r0, [r0+r4]
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
movq [r7+16], xmm0
lea r0, [r0+r4]
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
movq [r7+24], xmm0
movdqa xmm0, [r7] ; block 0~7
movdqa xmm1, [r7+16] ; block 8~15
movdqa xmm2, xmm0
paddw xmm0, xmm1
SUM_WORD_8x2_SSE2 xmm0, xmm3
pmullw xmm1, xmm1
pmullw xmm2, xmm2
movdqa xmm3, xmm1
movdqa xmm4, xmm2
punpcklwd xmm1, xmm7
punpckhwd xmm3, xmm7
punpcklwd xmm2, xmm7
punpckhwd xmm4, xmm7
paddd xmm1, xmm2
paddd xmm3, xmm4
paddd xmm1, xmm3
pshufd xmm2, xmm1, 01Bh
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0B1h
paddd xmm1, xmm2
movd r2d, xmm0
and r2, 0ffffh ; effective low work truncated
mov r3, r2
imul r2, r3
sar r2, $04
movd retrd, xmm1
sub retrd, r2d
add r7,32
add r7,r5
%ifdef X86_32
pop r6
pop r5
pop r4
pop r3
%endif
POP_XMM
ret
;***********************************************************************
; int32_t AnalysisVaaInfoIntra_ssse3( uint8_t *pDataY, const int32_t iLineSize );
;***********************************************************************
WELS_EXTERN AnalysisVaaInfoIntra_ssse3
%assign push_num 0
LOAD_2_PARA
PUSH_XMM 8
SIGN_EXTENSION r1,r1d
%ifdef X86_32
push r3
push r4
push r5
push r6
%assign push_num push_num+4
%endif
mov r5,r7
and r5,0fh
sub r7,r5
sub r7,32
mov r2,r1
sal r2,$01 ;r2 = 2*iLineSize
mov r3,r2
add r3,r1 ;r3 = 3*iLineSize
mov r4,r2
sal r4,$01 ;r4 = 4*iLineSize
pxor xmm7, xmm7
; loops
VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
movq [r7],xmm0
lea r0,[r0+r4]
VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
movq [r7+8],xmm1
lea r0,[r0+r4]
VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
movq [r7+16],xmm0
lea r0,[r0+r4]
VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
movq [r7+24],xmm1
movdqa xmm0,[r7]
movdqa xmm1,[r7+16]
movdqa xmm2, xmm0
paddw xmm0, xmm1
SUM_WORD_8x2_SSE2 xmm0, xmm3 ; better performance than that of phaddw sets
pmullw xmm1, xmm1
pmullw xmm2, xmm2
movdqa xmm3, xmm1
movdqa xmm4, xmm2
punpcklwd xmm1, xmm7
punpckhwd xmm3, xmm7
punpcklwd xmm2, xmm7
punpckhwd xmm4, xmm7
paddd xmm1, xmm2
paddd xmm3, xmm4
paddd xmm1, xmm3
pshufd xmm2, xmm1, 01Bh
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0B1h
paddd xmm1, xmm2
movd r2d, xmm0
and r2, 0ffffh ; effective low work truncated
mov r3, r2
imul r2, r3
sar r2, $04
movd retrd, xmm1
sub retrd, r2d
add r7,32
add r7,r5
%ifdef X86_32
pop r6
pop r5
pop r4
pop r3
%endif
POP_XMM
ret
;***********************************************************************
; uint8_t MdInterAnalysisVaaInfo_sse41( int32_t *pSad8x8 )
;***********************************************************************
WELS_EXTERN MdInterAnalysisVaaInfo_sse41
%assign push_num 0
LOAD_1_PARA
movdqa xmm0,[r0]
pshufd xmm1, xmm0, 01Bh
paddd xmm1, xmm0
pshufd xmm2, xmm1, 0B1h
paddd xmm1, xmm2
psrad xmm1, 02h ; iAverageSad
movdqa xmm2, xmm1
psrad xmm2, 06h
movdqa xmm3, xmm0 ; iSadBlock
psrad xmm3, 06h
psubd xmm3, xmm2
pmulld xmm3, xmm3 ; [comment]: pmulld from SSE4.1 instruction sets
pshufd xmm4, xmm3, 01Bh
paddd xmm4, xmm3
pshufd xmm3, xmm4, 0B1h
paddd xmm3, xmm4
movd r0d, xmm3
cmp r0d, 20 ; INTER_VARIANCE_SAD_THRESHOLD
jb near .threshold_exit
pshufd xmm0, xmm0, 01Bh
pcmpgtd xmm0, xmm1 ; iSadBlock > iAverageSad
movmskps retrd, xmm0
ret
.threshold_exit:
mov retrd, 15
ret
;***********************************************************************
; uint8_t MdInterAnalysisVaaInfo_sse2( int32_t *pSad8x8 )
;***********************************************************************
WELS_EXTERN MdInterAnalysisVaaInfo_sse2
%assign push_num 0
LOAD_1_PARA
movdqa xmm0, [r0]
pshufd xmm1, xmm0, 01Bh
paddd xmm1, xmm0
pshufd xmm2, xmm1, 0B1h
paddd xmm1, xmm2
psrad xmm1, 02h ; iAverageSad
movdqa xmm2, xmm1
psrad xmm2, 06h
movdqa xmm3, xmm0 ; iSadBlock
psrad xmm3, 06h
psubd xmm3, xmm2
; to replace pmulld functionality as below
movdqa xmm2, xmm3
pmuludq xmm2, xmm3
pshufd xmm4, xmm3, 0B1h
pmuludq xmm4, xmm4
movdqa xmm5, xmm2
punpckldq xmm5, xmm4
punpckhdq xmm2, xmm4
punpcklqdq xmm5, xmm2
pshufd xmm4, xmm5, 01Bh
paddd xmm4, xmm5
pshufd xmm5, xmm4, 0B1h
paddd xmm5, xmm4
movd r0d, xmm5
cmp r0d, 20 ; INTER_VARIANCE_SAD_THRESHOLD
jb near .threshold_exit
pshufd xmm0, xmm0, 01Bh
pcmpgtd xmm0, xmm1 ; iSadBlock > iAverageSad
movmskps retrd, xmm0
ret
.threshold_exit:
mov retrd, 15
ret

View File

@ -0,0 +1,129 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON
#include "arm_arch_common_macro.S"
.macro ROW_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
// { // input: src_d[0]~[3], output: e_q[0]~[3]; working: $8 $9
vaddl.s16 \arg4, \arg0, \arg2 //int32 e[i][0] = src[0] + src[2];
vsubl.s16 \arg5, \arg0, \arg2 //int32 e[i][1] = src[0] - src[2];
vshr.s16 \arg8, \arg1, #1
vshr.s16 \arg9, \arg3, #1
vsubl.s16 \arg6, \arg8, \arg3 //int32 e[i][2] = (src[1]>>1)-src[3];
vaddl.s16 \arg7, \arg1, \arg9 //int32 e[i][3] = src[1] + (src[3]>>1);
// }
.endm
.macro TRANSFORM_4BYTES arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 // both row & col transform used
// { // output: f_q[0]~[3], input: e_q[0]~[3];
vadd.s32 \arg0, \arg4, \arg7 //int16 f[i][0] = e[i][0] + e[i][3];
vadd.s32 \arg1, \arg5, \arg6 //int16 f[i][1] = e[i][1] + e[i][2];
vsub.s32 \arg2, \arg5, \arg6 //int16 f[i][2] = e[i][1] - e[i][2];
vsub.s32 \arg3, \arg4, \arg7 //int16 f[i][3] = e[i][0] - e[i][3];
// }
.endm
.macro COL_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
// { // input: src_q[0]~[3], output: e_q[0]~[3];
vadd.s32 \arg4, \arg0, \arg2 //int32 e[0][j] = f[0][j] + f[2][j];
vsub.s32 \arg5, \arg0, \arg2 //int32 e[1][j] = f[0][j] - f[2][j];
vshr.s32 \arg6, \arg1, #1
vshr.s32 \arg7, \arg3, #1
vsub.s32 \arg6, \arg6, \arg3 //int32 e[2][j] = (f[1][j]>>1) - f[3][j];
vadd.s32 \arg7, \arg1, \arg7 //int32 e[3][j] = f[1][j] + (f[3][j]>>1);
// }
.endm
// uint8_t *pred, const int32_t stride, int16_t *rs
WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
vld4.s16 {d0, d1, d2, d3}, [r2] // cost 3 cycles!
ROW_TRANSFORM_1_STEP d0, d1, d2, d3, q8, q9, q10, q11, d4, d5
TRANSFORM_4BYTES q0, q1, q2, q3, q8, q9, q10, q11
// transform element 32bits
vtrn.s32 q0, q1 //[0 1 2 3]+[4 5 6 7]-->[0 4 2 6]+[1 5 3 7]
vtrn.s32 q2, q3 //[8 9 10 11]+[12 13 14 15]-->[8 12 10 14]+[9 13 11 15]
vswp d1, d4 //[0 4 2 6]+[8 12 10 14]-->[0 4 8 12]+[2 6 10 14]
vswp d3, d6 //[1 5 3 7]+[9 13 11 15]-->[1 5 9 13]+[3 7 11 15]
COL_TRANSFORM_1_STEP q0, q1, q2, q3, q8, q9, q10, q11
TRANSFORM_4BYTES q0, q1, q2, q3, q8, q9, q10, q11
//after clip_table[MAX_NEG_CROP] into [0, 255]
mov r2, r0
vld1.32 {d20[0]},[r0],r1
vld1.32 {d20[1]},[r0],r1
vld1.32 {d22[0]},[r0],r1
vld1.32 {d22[1]},[r0]
vrshrn.s32 d16, q0, #6
vrshrn.s32 d17, q1, #6
vrshrn.s32 d18, q2, #6
vrshrn.s32 d19, q3, #6
vmovl.u8 q0,d20
vmovl.u8 q1,d22
vadd.s16 q0,q8
vadd.s16 q1,q9
vqmovun.s16 d20,q0
vqmovun.s16 d22,q1
vst1.32 {d20[0]},[r2],r1
vst1.32 {d20[1]},[r2],r1
vst1.32 {d22[0]},[r2],r1
vst1.32 {d22[1]},[r2]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsBlockZero16x16_neon
veor q0, q0
veor q1, q1
lsl r1, r1, 1
.rept 16
vst1.64 {q0, q1}, [r0], r1
.endr
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsBlockZero8x8_neon
veor q0, q0
lsl r1, r1, 1
.rept 8
vst1.64 {q0}, [r0], r1
.endr
WELS_ASM_FUNC_END
#endif

View File

@ -0,0 +1,635 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON
//Global macro
#include "arm_arch_common_macro.S"
//Global macro
.macro GET_8BYTE_DATA arg0, arg1, arg2
vld1.8 {\arg0[0]}, [\arg1], \arg2
vld1.8 {\arg0[1]}, [\arg1], \arg2
vld1.8 {\arg0[2]}, [\arg1], \arg2
vld1.8 {\arg0[3]}, [\arg1], \arg2
vld1.8 {\arg0[4]}, [\arg1], \arg2
vld1.8 {\arg0[5]}, [\arg1], \arg2
vld1.8 {\arg0[6]}, [\arg1], \arg2
vld1.8 {\arg0[7]}, [\arg1], \arg2
.endm
WELS_ASM_FUNC_BEGIN WelsDecoderI16x16LumaPredV_neon
//Get the top line data to 'q0'
sub r2, r0, r1
vldm r2, {d0, d1}
mov r2, r0
mov r3, #4
//Set the top line to the each line of MB(16*16)
loop_0_get_i16x16_luma_pred_v:
vst1.8 {d0,d1}, [r2], r1
vst1.8 {d0,d1}, [r2], r1
vst1.8 {d0,d1}, [r2], r1
vst1.8 {d0,d1}, [r2], r1
subs r3, #1
bne loop_0_get_i16x16_luma_pred_v
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI16x16LumaPredH_neon
sub r2, r0, #1
mov r3, #4
loop_0_get_i16x16_luma_pred_h:
//Get one byte data from left side
vld1.8 {d0[],d1[]}, [r2], r1
vld1.8 {d2[],d3[]}, [r2], r1
vld1.8 {d4[],d5[]}, [r2], r1
vld1.8 {d6[],d7[]}, [r2], r1
//Set the line of MB using the left side byte data
vst1.8 {d0,d1}, [r0], r1
vst1.8 {d2,d3}, [r0], r1
vst1.8 {d4,d5}, [r0], r1
vst1.8 {d6,d7}, [r0], r1
subs r3, #1
bne loop_0_get_i16x16_luma_pred_h
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI16x16LumaPredDc_neon
//stmdb sp!, { r2-r5, lr}
//Get the left vertical line data
sub r2, r0, #1
GET_8BYTE_DATA d0, r2, r1
GET_8BYTE_DATA d1, r2, r1
//Get the top horizontal line data
sub r2, r0, r1
vldm r2, {d2, d3}
//Calculate the sum of top horizontal line data and vertical line data
vpaddl.u8 q0, q0
vpaddl.u8 q1, q1
vadd.u16 q0, q0, q1
vadd.u16 d0, d0, d1
vpaddl.u16 d0, d0
vpaddl.u32 d0, d0
//Calculate the mean value
vrshr.u16 d0, d0, #5
vdup.8 q0, d0[0]
//Set the mean value to the all of member of MB
mov r2, #4
loop_0_get_i16x16_luma_pred_dc_both:
vst1.8 {d0,d1}, [r0], r1
vst1.8 {d0,d1}, [r0], r1
vst1.8 {d0,d1}, [r0], r1
vst1.8 {d0,d1}, [r0], r1
subs r2, #1
bne loop_0_get_i16x16_luma_pred_dc_both
WELS_ASM_FUNC_END
.align 3
//The table for SIMD instruction {(8,7,6,5,4,3,2,1) * 5}
CONST0_GET_I16X16_LUMA_PRED_PLANE: .long 0x191e2328, 0x050a0f14
//The table for SIMD instruction {-7,-6,-5,-4,-3,-2,-1,0}
CONST1_GET_I16X16_LUMA_PRED_PLANE: .long 0xfcfbfaf9, 0x00fffefd
WELS_ASM_FUNC_BEGIN WelsDecoderI16x16LumaPredPlane_neon
//stmdb sp!, { r2-r5, lr}
//Load the table {(8,7,6,5,4,3,2,1) * 5}
adr r2, CONST0_GET_I16X16_LUMA_PRED_PLANE
vldr d0, [r2]
//Pack the top[-1] ~ top[6] to d1
sub r2, r0, r1
sub r3, r2, #1
vld1.8 d1, [r3]
//Pack the top[8] ~ top[15] to d2
add r3, #9
vld1.8 d2, [r3]
//Save the top[15] to d6 for next step
vdup.u8 d6, d2[7]
//Get and pack left[-1] ~ left[6] to d4
sub r3, r2, #1
GET_8BYTE_DATA d4, r3, r1
//Get and pack left[8] ~ left[15] to d3
add r3, r1
GET_8BYTE_DATA d3, r3, r1
//Save the left[15] to d7 for next step
vdup.u8 d7, d3[7]
//revert the sequence of d2,d3
vrev64.8 q1, q1
vsubl.u8 q2, d3, d4 //q2={left[8]-left[6],left[9]-left[5],left[10]-left[4], ...}
vsubl.u8 q1, d2, d1 //q1={top[8]-top[6],top[9]-top[5],top[10]-top[4], ...}
vmovl.u8 q0, d0
vmul.s16 q1, q0, q1 //q1 = q1*{(8,7,6,5,4,3,2,1) * 5}
vmul.s16 q2, q0, q2 //q2 = q2*{(8,7,6,5,4,3,2,1) * 5}
//Calculate the sum of items of q1, q2
vpadd.s16 d0, d2, d3
vpadd.s16 d1, d4, d5
vpaddl.s16 q0, q0
vpaddl.s32 q0, q0
//Get the value of 'b', 'c' and extend to q1, q2.
vrshr.s64 q0, #6
vdup.s16 q1, d0[0]
vdup.s16 q2, d1[0]
//Load the table {-7,-6,-5,-4,-3,-2,-1,0} to d0
adr r2, CONST1_GET_I16X16_LUMA_PRED_PLANE
vld1.32 {d0}, [r2]
//Get the value of 'a' and save to q3
vaddl.u8 q3, d6, d7
vshl.u16 q3, #4
//calculate a+'b'*{-7,-6,-5,-4,-3,-2,-1,0} + c*{-7}
vmovl.s8 q0, d0
vmla.s16 q3, q0, q1
vmla.s16 q3, q2, d0[0]
//Calculate a+'b'*{1,2,3,4,5,6,7,8} + c*{-7}
vshl.s16 q8, q1, #3
vadd.s16 q8, q3
//right shift 5 bits and rounding
vqrshrun.s16 d0, q3, #5
vqrshrun.s16 d1, q8, #5
//Set the line of MB
vst1.u32 {d0,d1}, [r0], r1
//Do the same processing for setting other lines
mov r2, #15
loop_0_get_i16x16_luma_pred_plane:
vadd.s16 q3, q2
vadd.s16 q8, q2
vqrshrun.s16 d0, q3, #5
vqrshrun.s16 d1, q8, #5
vst1.u32 {d0,d1}, [r0], r1
subs r2, #1
bne loop_0_get_i16x16_luma_pred_plane
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredV_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row (4 bytes)
sub r2, r0, r1
ldr r2, [r2]
//Set the luma MB using top line
str r2, [r0], r1
str r2, [r0], r1
str r2, [r0], r1
str r2, [r0]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredH_neon
//stmdb sp!, { r2-r5, lr}
//Load the left column (4 bytes)
sub r2, r0, #1
vld1.8 {d0[]}, [r2], r1
vld1.8 {d1[]}, [r2], r1
vld1.8 {d2[]}, [r2], r1
vld1.8 {d3[]}, [r2]
//Set the luma MB using the left side byte
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d1[0]}, [r0], r1
vst1.32 {d2[0]}, [r0], r1
vst1.32 {d3[0]}, [r0]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredDDL_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row data(8 bytes)
sub r2, r0, r1
vld1.32 {d0}, [r2]
//For "t7 + (t7<<1)"
vdup.8 d1, d0[7]
//calculate "t0+t1,t1+t2,t2+t3...t6+t7,t7+t7"
vext.8 d1, d0, d1, #1
vaddl.u8 q1, d1, d0
//calculate "x,t0+t1+t1+t2,t1+t2+t2+t3,...t5+t6+t6+t7,t6+t7+t7+t7"
vext.8 q2, q1, q1, #14
vadd.u16 q0, q1, q2
//right shift 2 bits and rounding
vqrshrn.u16 d0, q0, #2
//Save "ddl0, ddl1, ddl2, ddl3"
vext.8 d1, d0, d0, #1
vst1.32 d1[0], [r0], r1
//Save "ddl1, ddl2, ddl3, ddl4"
vext.8 d1, d0, d0, #2
vst1.32 d1[0], [r0], r1
//Save "ddl2, ddl3, ddl4, ddl5"
vext.8 d1, d0, d0, #3
vst1.32 d1[0], [r0], r1
//Save "ddl3, ddl4, ddl5, ddl6"
vst1.32 d0[1], [r0]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredDDR_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row (4 bytes)
sub r2, r0, r1
vld1.32 {d0[1]}, [r2]
//Load the left column (5 bytes)
sub r2, #1
vld1.8 {d0[3]}, [r2], r1
vld1.8 {d0[2]}, [r2], r1
vld1.8 {d0[1]}, [r2], r1
vld1.8 {d0[0]}, [r2], r1
vld1.8 {d1[7]}, [r2] //For packing the right sequence to do SIMD processing
vext.8 d2, d1, d0, #7 //d0:{L2,L1,L0,LT,T0,T1,T2,T3}
//d2:{L3,L2,L1,L0,LT,T0,T1,T2}
//q2:{L2+L3,L1+L2,L0+L1...T1+T2,T2+T3}
vaddl.u8 q2, d2, d0
//q1:{TL0+LT0,LT0+T01,...L12+L23}
vext.8 q3, q3, q2, #14
vadd.u16 q1, q2, q3
//right shift 2 bits and rounding
vqrshrn.u16 d0, q1, #2
//Adjust the data sequence for setting luma MB of 'pred'
vst1.32 d0[1], [r0], r1
vext.8 d0, d0, d0, #7
vst1.32 d0[1], [r0], r1
vext.8 d0, d0, d0, #7
vst1.32 d0[1], [r0], r1
vext.8 d0, d0, d0, #7
vst1.32 d0[1], [r0]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredVL_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row (8 bytes)
sub r2, r0, r1
vld1.32 {d0}, [r2]
vext.8 d1, d0, d0, #1
vaddl.u8 q1, d1, d0 //q1:{t0+t1,t1+t2,t2+t3...t5+t6,x,x}
vext.8 q2, q1, q1, #2
vadd.u16 q2, q1, q2 //q2:{t0+t1+t1+t2,t1+t2+t2+t3,...t4+t5+t5+t6,x,x}
//calculate the "vl0,vl1,vl2,vl3,vl4"
vqrshrn.u16 d0, q1, #1
//calculate the "vl5,vl6,vl7,vl8,vl9"
vqrshrn.u16 d1, q2, #2
//Adjust the data sequence for setting the luma MB
vst1.32 d0[0], [r0], r1
vst1.32 d1[0], [r0], r1
vext.8 d0, d0, d0, #1
vext.8 d1, d1, d1, #1
vst1.32 d0[0], [r0], r1
vst1.32 d1[0], [r0]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredVR_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row (4 bytes)
sub r2, r0, r1
vld1.32 {d0[1]}, [r2]
//Load the left column (4 bytes)
sub r2, #1
vld1.8 {d0[3]}, [r2], r1
vld1.8 {d0[2]}, [r2], r1
vld1.8 {d0[1]}, [r2], r1
vld1.8 {d0[0]}, [r2]
vext.8 d1, d0, d0, #7
vaddl.u8 q1, d0, d1 //q1:{X,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1,T1+T2,T2+T3}
vext.u8 q2, q1, q1, #14
vadd.u16 q2, q2, q1 //q2:{X,L2+L1+L1+L0,L1+L0+L0+LT,...T1+T2+T2+T3}
//Calculate the vr0 ~ vr9
vqrshrn.u16 d1, q2, #2
vqrshrn.u16 d0, q1, #1
//Adjust the data sequence for setting the luma MB
vst1.32 d0[1], [r0], r1
vst1.32 d1[1], [r0], r1
add r2, r0, r1
vst1.8 d1[3], [r0]!
vst1.16 d0[2], [r0]!
vst1.8 d0[6], [r0]!
vst1.8 d1[2], [r2]!
vst1.16 d1[2], [r2]!
vst1.8 d1[6], [r2]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredHU_neon
//stmdb sp!, { r2-r5, lr}
//Load the left column data
sub r2, r0, #1
mov r3, #3
mul r3, r1
add r3, r2
vld1.8 {d0[]}, [r3]
vld1.8 {d0[4]}, [r2], r1
vld1.8 {d0[5]}, [r2], r1
vld1.8 {d0[6]}, [r2], r1 //d0:{L3,L3,L3,L3,L0,L1,L2,L3}
vext.8 d1, d0, d0, #1
vaddl.u8 q2, d0, d1 //q2:{L3+L3,L3+L3,L3+L3,L3+L0,L0+L1,L1+L2,L2+L3,L3+L3}
vext.u8 d2, d5, d4, #2
vadd.u16 d3, d2, d5 //d3:{L0+L1+L1+L2,L1+L2+L2+L3,L2+L3+L3+L3,L3+L3+L3+L3}
//Calculate the hu0 ~ hu5
vqrshrn.u16 d2, q2, #1
vqrshrn.u16 d1, q1, #2
//Adjust the data sequence for setting the luma MB
vzip.8 d2, d1
vst1.32 d1[0], [r0], r1
vext.8 d2, d1, d1, #2
vst1.32 d2[0], [r0], r1
vst1.32 d1[1], [r0], r1
vst1.32 d0[0], [r0]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderI4x4LumaPredHD_neon
//stmdb sp!, { r2-r5, lr}
//Load the data
sub r2, r0, r1
sub r2, #1
vld1.32 {d0[1]}, [r2], r1
vld1.8 {d0[3]}, [r2], r1
vld1.8 {d0[2]}, [r2], r1
vld1.8 {d0[1]}, [r2], r1
vld1.8 {d0[0]}, [r2] //d0:{L3,L2,L1,L0,LT,T0,T1,T2}
vext.8 d1, d0, d0, #7
vaddl.u8 q1, d0, d1 //q1:{x,L3+L2,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1,T1+T2}
vext.u8 q2, q1, q1, #14 //q2:{x,x, L3+L2,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1}
vadd.u16 q3, q2, q1 //q3:{x,x,L3+L2+L2+L1,L2+L1+L1+L0,L1+L0+L0+LT,L0+LT+LT+T0,LT+T0+T0+T1,T0+T1+T1+T2}
//Calculate the hd0~hd9
vqrshrn.u16 d1, q3, #2
vqrshrn.u16 d0, q2, #1
//Adjust the data sequence for setting the luma MB
vmov d3, d1
vtrn.8 d0, d1
vext.u8 d2, d1, d1, #6
vst2.16 {d2[3], d3[3]}, [r0], r1
vst2.16 {d0[2], d1[2]}, [r0], r1
vmov d3, d0
vst2.16 {d2[2], d3[2]}, [r0], r1
vst2.16 {d0[1], d1[1]}, [r0]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderIChromaPredV_neon
//stmdb sp!, { r2-r5, lr}
//Get the top row (8 byte)
sub r2, r0, r1
vldr d0, [r2]
//Set the chroma MB using top row data
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderIChromaPredH_neon
//stmdb sp!, { r2-r5, lr}
////Get the left column (8 byte)
sub r2, r0, #1
vld1.8 {d0[]}, [r2], r1
vld1.8 {d1[]}, [r2], r1
vld1.8 {d2[]}, [r2], r1
vld1.8 {d3[]}, [r2], r1
vld1.8 {d4[]}, [r2], r1
vld1.8 {d5[]}, [r2], r1
vld1.8 {d6[]}, [r2], r1
vld1.8 {d7[]}, [r2]
//Set the chroma MB using left column data
vst1.8 {d0}, [r0], r1
vst1.8 {d1}, [r0], r1
vst1.8 {d2}, [r0], r1
vst1.8 {d3}, [r0], r1
vst1.8 {d4}, [r0], r1
vst1.8 {d5}, [r0], r1
vst1.8 {d6}, [r0], r1
vst1.8 {d7}, [r0]
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsDecoderIChromaPredDc_neon
//stmdb sp!, { r2-r5, lr}
//Load the left column data (8 bytes)
sub r2, r0, #1
GET_8BYTE_DATA d0, r2, r1
//Load the top row data (8 bytes)
sub r2, r0, r1
vldr d1, [r2]
//Calculate the sum of left column and top row
vpaddl.u8 q0, q0
vpaddl.u16 q0, q0
vadd.u32 d2, d0, d1 //'m1' save to d2
vrshr.u32 q0, q0, #2 //calculate 'm2','m3'
vrshr.u32 d2, d2, #3 //calculate 'm4'
//duplicate the 'mx' to a vector line
vdup.8 d4, d2[0]
vdup.8 d5, d1[4]
vdup.8 d6, d0[4]
vdup.8 d7, d2[4]
//Set the chroma MB
vst2.32 {d4[0],d5[0]}, [r0], r1
vst2.32 {d4[0],d5[0]}, [r0], r1
vst2.32 {d4[0],d5[0]}, [r0], r1
vst2.32 {d4[0],d5[0]}, [r0], r1
vst2.32 {d6[0],d7[0]}, [r0], r1
vst2.32 {d6[0],d7[0]}, [r0], r1
vst2.32 {d6[0],d7[0]}, [r0], r1
vst2.32 {d6[0],d7[0]}, [r0]
WELS_ASM_FUNC_END
//Table {{1,2,3,4,1,2,3,4}*17}
CONST0_GET_I_CHROMA_PRED_PLANE: .long 0x44332211, 0x44332211//0x140f0a05, 0x28231e19
//Table {-3,-2,-1,0,1,2,3,4}
CONST1_GET_I_CHROMA_PRED_PLANE: .long 0xfffefffd, 0x0000ffff,0x00020001,0x00040003
WELS_ASM_FUNC_BEGIN WelsDecoderIChromaPredPlane_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row data
sub r2, r0, #1
sub r2, r1
vld1.32 {d1[0]}, [r2]
add r2, #5
vld1.32 {d0[0]}, [r2]
//Load the left column data
sub r2, #5
vld1.8 {d1[4]}, [r2], r1
vld1.8 {d1[5]}, [r2], r1
vld1.8 {d1[6]}, [r2], r1
vld1.8 {d1[7]}, [r2], r1 //d1:{LT,T0,T1,T2,LT,L0,L1,L2}
add r2, r1
vld1.8 {d0[4]}, [r2], r1
vld1.8 {d0[5]}, [r2], r1
vld1.8 {d0[6]}, [r2], r1
vld1.8 {d0[7]}, [r2] //d0:{T4,T5,T6,T7,L4,L5,L6.L7}
//Save T7 to d3 for next step
vdup.u8 d3, d0[3]
//Save L7 to d4 for next step
vdup.u8 d4, d0[7]
//Calculate the value of 'a' and save to q2
vaddl.u8 q2, d3, d4
vshl.u16 q2, #4
//Load the table {{1,2,3,4,1,2,3,4}*17}
adr r2, CONST0_GET_I_CHROMA_PRED_PLANE
vld1.32 {d2}, [r2]
//Calculate the 'b','c', and save to q0
vrev32.8 d1, d1
vsubl.u8 q0, d0, d1
vmovl.u8 q1, d2
vmul.s16 q0, q1
vpaddl.s16 q0, q0
vpaddl.s32 q0, q0
vrshr.s64 q0, #5
//Load the table {-3,-2,-1,0,1,2,3,4} to q3
adr r2, CONST1_GET_I_CHROMA_PRED_PLANE
vld1.32 {d6, d7}, [r2]
//Duplicate the 'b','c' to q0, q1 for SIMD instruction
vdup.s16 q1, d1[0]
vdup.s16 q0, d0[0]
//Calculate the "(a + b * (j - 3) + c * (- 3) + 16) >> 5;"
vmla.s16 q2, q0, q3
vmla.s16 q2, q1, d6[0]
vqrshrun.s16 d0, q2, #5
//Set a line of chroma MB
vst1.u32 {d0}, [r0], r1
//Do the same processing for each line.
mov r2, #7
loop_0_get_i_chroma_pred_plane:
vadd.s16 q2, q1
vqrshrun.s16 d0, q2, #5
vst1.u32 {d0}, [r0], r1
subs r2, #1
bne loop_0_get_i_chroma_pred_plane
WELS_ASM_FUNC_END
#endif

View File

@ -0,0 +1,131 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON_AARCH64
#include "arm_arch64_common_macro.S"
.macro ROW_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
// { // input: src_d[0]~[3], output: e_q[0]~[3]; working: \arg8\() \arg9\()
saddl \arg4\().4s, \arg0\().4h, \arg2\().4h //int32 e[i][0] = src[0] + src[2];
ssubl \arg5\().4s, \arg0\().4h, \arg2\().4h //int32 e[i][1] = src[0] - src[2];
sshr \arg8\().4h, \arg1\().4h, #1
sshr \arg9\().4h, \arg3\().4h, #1
ssubl \arg6\().4s, \arg8\().4h, \arg3\().4h //int32 e[i][2] = (src[1]>>1)-src[3];
saddl \arg7\().4s, \arg1\().4h, \arg9\().4h //int32 e[i][3] = src[1] + (src[3]>>1);
// }
.endm
.macro TRANSFORM_4BYTES arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
// both row & col transform used
// { // output: f_q[0]~[3], input: e_q[0]~[3];
add \arg0\().4s, \arg4\().4s, \arg7\().4s //int16 f[i][0] = e[i][0] + e[i][3];
add \arg1\().4s, \arg5\().4s, \arg6\().4s //int16 f[i][1] = e[i][1] + e[i][2];
sub \arg2\().4s, \arg5\().4s, \arg6\().4s //int16 f[i][2] = e[i][1] - e[i][2];
sub \arg3\().4s, \arg4\().4s, \arg7\().4s //int16 f[i][3] = e[i][0] - e[i][3];
// }
.endm
.macro COL_TRANSFORM_1_STEP arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
// { // input: src_q[0]~[3], output: e_q[0]~[3];
add \arg4\().4s, \arg0\().4s, \arg2\().4s //int32 e[0][j] = f[0][j] + f[2][j];
sub \arg5\().4s, \arg0\().4s, \arg2\().4s //int32 e[1][j] = f[0][j] - f[2][j];
sshr \arg6\().4s, \arg1\().4s, #1
sshr \arg7\().4s, \arg3\().4s, #1
sub \arg6\().4s, \arg6\().4s, \arg3\().4s //int32 e[2][j] = (f[1][j]>>1) - f[3][j];
add \arg7\().4s, \arg1\().4s, \arg7\().4s //int32 e[3][j] = f[1][j] + (f[3][j]>>1);
// }
.endm
// uint8_t *pred, const int32_t stride, int16_t *rs
WELS_ASM_AARCH64_FUNC_BEGIN IdctResAddPred_AArch64_neon
SIGN_EXTENSION x1,w1
ld4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x2] // cost 3 cycles!
ROW_TRANSFORM_1_STEP v0, v1, v2, v3, v16, v17, v18, v19, v4, v5
TRANSFORM_4BYTES v0, v1, v2, v3, v16, v17, v18, v19
// transform element 32bits
trn1 v16.4s, v0.4s, v1.4s //[0 1 2 3]+[4 5 6 7]-->[0 4 2 6]
trn2 v17.4s, v0.4s, v1.4s //[0 1 2 3]+[4 5 6 7]-->[1 5 3 7]
trn1 v18.4s, v2.4s, v3.4s //[8 9 10 11]+[12 13 14 15]-->[8 12 10 14]
trn2 v19.4s, v2.4s, v3.4s //[8 9 10 11]+[12 13 14 15]-->[9 13 11 15]
trn1 v0.2d, v16.2d, v18.2d //[0 4 2 6]+[8 12 10 14]-->[0 4 8 12]
trn2 v2.2d, v16.2d, v18.2d //[0 4 2 6]+[8 12 10 14]-->[2 6 10 14]
trn1 v1.2d, v17.2d, v19.2d //[1 5 3 7]+[9 13 11 15]-->[1 5 9 13]
trn2 v3.2d, v17.2d, v19.2d //[1 5 3 7]+[9 13 11 15]-->[3 7 11 15]
COL_TRANSFORM_1_STEP v0, v1, v2, v3, v16, v17, v18, v19
TRANSFORM_4BYTES v0, v1, v2, v3, v16, v17, v18, v19
//after clip_table[MAX_NEG_CROP] into [0, 255]
mov x2, x0
ld1 {v16.s}[0],[x0],x1
ld1 {v16.s}[1],[x0],x1
ld1 {v17.s}[0],[x0],x1
ld1 {v17.s}[1],[x0]
rshrn v0.4h, v0.4s, #6
rshrn2 v0.8h, v1.4s, #6
rshrn v1.4h, v2.4s, #6
rshrn2 v1.8h, v3.4s, #6
uxtl v2.8h,v16.8b
uxtl v3.8h,v17.8b
add v2.8h, v2.8h, v0.8h
add v3.8h, v3.8h, v1.8h
sqxtun v0.8b,v2.8h
sqxtun v1.8b,v3.8h
st1 {v0.s}[0],[x2],x1
st1 {v0.s}[1],[x2],x1
st1 {v1.s}[0],[x2],x1
st1 {v1.s}[1],[x2]
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero16x16_AArch64_neon
eor v0.16b, v0.16b, v0.16b
eor v1.16b, v1.16b, v1.16b
SIGN_EXTENSION x1,w1
lsl x1, x1, 1
.rept 16
st1 {v0.16b, v1.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero8x8_AArch64_neon
eor v0.16b, v0.16b, v0.16b
SIGN_EXTENSION x1, w1
lsl x1, x1, 1
.rept 8
st1 {v0.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
#endif

View File

@ -0,0 +1,524 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON_AARCH64
#include "arm_arch64_common_macro.S"
// for Luma 4x4
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredH_AArch64_neon
sxtw x1, w1
sub x2, x0, #1
.rept 4
ld1r {v0.8b}, [x2], x1
st1 {v0.S}[0], [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredDc_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
sub x3, x0, #1
ldr s0, [x2]
ld1 {v0.b}[4], [x3], x1
ld1 {v0.b}[5], [x3], x1
ld1 {v0.b}[6], [x3], x1
ld1 {v0.b}[7], [x3]
uaddlv h0, v0.8b
uqrshrn b0, h0, #3
dup v0.8b, v0.b[0]
.rept 4
st1 {v0.S}[0], [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredDcTop_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
sub v0.8b, v0.8b, v0.8b
ldr s0, [x2]
uaddlv h0, v0.8b
uqrshrn v0.8b, v0.8h, #2
dup v0.8b, v0.b[0]
.rept 4
st1 {v0.S}[0], [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredDDL_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
ld1 {v0.8b}, [x2]
dup v1.8b, v0.b[7]
ext v2.8b, v0.8b, v1.8b, #1
ext v3.8b, v0.8b, v1.8b, #2
ushll v2.8h, v2.8b, #1
uaddl v1.8h, v3.8b, v0.8b
add v1.8h, v1.8h, v2.8h
uqrshrn v1.8b, v1.8h, #2
st1 {v1.S}[0], [x0], x1
ext v0.8b, v1.8b, v2.8b, #1
st1 {v0.S}[0], [x0], x1
ext v0.8b, v1.8b, v2.8b, #2
st1 {v0.S}[0], [x0], x1
ext v0.8b, v1.8b, v2.8b, #3
st1 {v0.S}[0], [x0]
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredDDLTop_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
ld1 {v0.8b}, [x2]
dup v1.8b, v0.b[3]
mov v0.S[1], v1.S[0]
ext v2.8b, v0.8b, v1.8b, #1
ext v3.8b, v0.8b, v1.8b, #2
ushll v2.8h, v2.8b, #1
uaddl v1.8h, v3.8b, v0.8b
add v1.8h, v1.8h, v2.8h
uqrshrn v1.8b, v1.8h, #2
st1 {v1.S}[0], [x0], x1
ext v0.8b, v1.8b, v2.8b, #1
st1 {v0.S}[0], [x0], x1
ext v0.8b, v1.8b, v2.8b, #2
st1 {v0.S}[0], [x0], x1
ext v0.8b, v1.8b, v2.8b, #3
st1 {v0.S}[0], [x0]
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredVL_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
ld1 {v0.8b}, [x2]
ext v1.8b, v0.8b, v0.8b, #1
uaddl v1.8h, v1.8b, v0.8b
uqrshrn v0.8b, v1.8h, #1 // v0.8b is VL0, VL1, VL2, VL3, VL4, ...
ext v2.16b, v1.16b, v1.16b, #2
add v1.8h, v2.8h, v1.8h
uqrshrn v1.8b, v1.8h, #2 // v1.8b is VL5, VL6, VL7, VL8, VL9
st1 {v0.s}[0], [x0], x1 // write the first row
st1 {v1.s}[0], [x0], x1 // write the second row
ext v3.8b, v0.8b, v0.8b, #1
ext v2.8b, v1.8b, v1.8b, #1
st1 {v3.s}[0], [x0], x1 // write the third row
st1 {v2.s}[0], [x0] // write the fourth row
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredVLTop_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
ld1 {v0.8b}, [x2]
dup v1.8b, v0.b[3]
mov v0.s[1], v1.s[0]
ext v1.8b, v0.8b, v0.8b, #1
uaddl v1.8h, v1.8b, v0.8b
uqrshrn v0.8b, v1.8h, #1 // v0.8b is VL0, VL1, VL2, VL3, VL4, ...
ext v2.16b, v1.16b, v1.16b, #2
add v1.8h, v2.8h, v1.8h
uqrshrn v1.8b, v1.8h, #2 // v1.8b is VL5, VL6, VL7, VL8, VL9
st1 {v0.s}[0], [x0], x1 // write the first row
st1 {v1.s}[0], [x0], x1 // write the second row
ext v3.8b, v0.8b, v0.8b, #1
ext v2.8b, v1.8b, v1.8b, #1
st1 {v3.s}[0], [x0], x1 // write the third row
st1 {v2.s}[0], [x0] // write the fourth row
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredVR_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
ld1 {v0.s}[1], [x2]
sub x2, x2, #1
ld1 {v0.b}[3], [x2], x1
ld1 {v0.b}[2], [x2], x1
ld1 {v0.b}[1], [x2], x1
ld1 {v0.b}[0], [x2] // v0.8b l2, l1, l0, lt, t0, t1, t2, t3
ext v1.8b, v0.8b, v0.8b, #7
uaddl v2.8h, v1.8b, v0.8b //v2:{X,L2+L1,L1+L0,L0+LT,LT+T0,T0+T1,T1+T2,T2+T3}
ext v1.16b, v2.16b, v2.16b, #14
add v3.8h, v2.8h, v1.8h //v3:{X,L2+L1+L1+L0,L1+L0+L0+LT,...T1+T2+T2+T3}
uqrshrn v3.8b, v3.8h, #2
uqrshrn v2.8b, v2.8h, #1
st1 {v2.s}[1], [x0], x1
st1 {v3.s}[1], [x0], x1
ext v2.8b, v2.8b, v2.8b, #7
ins v2.b[4], v3.b[3]
st1 {v2.s}[1], [x0], x1
ext v3.8b, v3.8b, v3.8b, #7
ins v3.b[4], v3.b[3]
st1 {v3.s}[1], [x0], x1
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredHU_AArch64_neon
sxtw x1, w1
sub x2, x0, #1
mov x3, #3
mul x3, x3, x1
add x3, x3, x2
ld1r {v0.8b}, [x3]
ld1 {v0.b}[4], [x2], x1
ld1 {v0.b}[5], [x2], x1
ld1 {v0.b}[6], [x2], x1 //d0:{L3,L3,L3,L3,L0,L1,L2,L3}
ext v1.8b, v0.8b, v0.8b, #1
uaddl v2.8h, v0.8b, v1.8b //v2:{L3+L3,L3+L3,L3+L3,L3+L0,L0+L1,L1+L2,L2+L3,L3+L3}
ext v3.16b, v2.16b, v2.16b, #2
add v3.8h, v3.8h, v2.8h //v2:{x, HU1, HU3, HU5, x}
uqrshrn v2.8b, v2.8h, #1 // HU0, HU2, HU4
uqrshrn v3.8b, v3.8h, #2 // HU1, HU3, HU5
zip2 v3.8b, v2.8b, v3.8b // HU0, HU1, HU2, HU3, HU4, HU5
mov v3.h[3], v0.h[0] // v0.8b is hu0, hu1, hu2, hu3, hu4, hu5, l3, l3
ext v2.8b, v3.8b, v0.8b, #2
st1 {v3.s}[0], [x0], x1
st1 {v2.s}[0], [x0], x1
st1 {v3.s}[1], [x0], x1
st1 {v0.s}[0], [x0]
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI4x4LumaPredHD_AArch64_neon
sxtw x1, w1
sub x2, x0, #1
sub x2, x2, x1 // x2 points to top left
ld1 {v0.s}[1], [x2], x1
ld1 {v0.b}[3], [x2], x1
ld1 {v0.b}[2], [x2], x1
ld1 {v0.b}[1], [x2], x1
ld1 {v0.b}[0], [x2] // v0.8b: l3, l2, l1, l0, lt, t0, t1, t2
ext v1.8b, v0.8b, v0.8b, #1 // v1.8b: l2, l1, l0, lt, t0, t1, t2, l3
uaddl v2.8h, v0.8b, v1.8b
ext v3.16b, v2.16b, v2.16b, #2
add v3.8h, v3.8h, v2.8h
uqrshrn v2.8b, v2.8h, #1 // hd8, hd6, hd4, hd0, xxx
uqrshrn v3.8b, v3.8h, #2 // hd9, hd7, hd5, hd1, hd2, hd3
zip1 v2.8b, v2.8b, v3.8b // hd8, hd9, hd6, hd7, hd4, hd5, hd0, hd1
mov v1.h[0], v3.h[2]
ext v3.8b, v2.8b, v1.8b, #6
st1 {v3.s}[0], [x0], x1
st1 {v2.s}[1], [x0], x1
ext v3.8b, v2.8b, v1.8b, #2
st1 {v3.s}[0], [x0], x1
st1 {v2.s}[0], [x0]
WELS_ASM_AARCH64_FUNC_END
// for Chroma 8x8
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredV_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
ld1 {v0.8b}, [x2]
.rept 8
st1 {v0.8b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredH_AArch64_neon
sxtw x1, w1
sub x2, x0, #1
.rept 8
ld1r {v0.8b}, [x2], x1
st1 {v0.8b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredDc_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
sub x3, x0, #1
ld1 {v0.8b}, [x2]
ld1 {v0.b}[8], [x3], x1
ld1 {v0.b}[9], [x3], x1
ld1 {v0.b}[10], [x3], x1
ld1 {v0.b}[11], [x3], x1
ld1 {v0.b}[12], [x3], x1
ld1 {v0.b}[13], [x3], x1
ld1 {v0.b}[14], [x3], x1
ld1 {v0.b}[15], [x3]
uaddlp v1.8h, v0.16b
uaddlp v2.4s, v1.8h
ins v3.d[0], v2.d[1]
add v3.2s, v2.2s, v3.2s
urshr v2.4s, v2.4s, #2
urshr v3.2s, v3.2s, #3
dup v0.8b, v3.b[0]
dup v1.8b, v2.b[4]
dup v2.8b, v2.b[12]
dup v3.8b, v3.b[4]
ins v0.s[1], v1.s[0]
ins v2.s[1], v3.s[0]
.rept 4
st1 {v0.8b}, [x0], x1
.endr
.rept 4
st1 {v2.8b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredDcTop_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
ld1 {v0.8b}, [x2]
uaddlp v0.4h, v0.8b
addp v0.8h, v0.8h, v0.8h
dup v1.8h, v0.h[0]
dup v2.8h, v0.h[1]
mov v1.D[1], v2.D[0]
uqrshrn v1.8b, v1.8h, #2
.rept 8
st1 {v1.8b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
.align 4
intra_1_to_4: .short 17*1, 17*2, 17*3, 17*4, 17*1, 17*2, 17*3, 17*4
intra_m3_to_p4: .short -3, -2, -1, 0, 1, 2, 3, 4
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderIChromaPredPlane_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
sub x2, x2, #1
mov x3, x2
// load pTop[2-i] and pLeft[(2-i)*kiStride]
ld1 {v1.b}[3], [x2], #1
ld1 {v1.b}[2], [x2], #1
ld1 {v1.b}[1], [x2], #1
ld1 {v1.b}[0], [x2], #1
ld1 {v1.b}[7], [x3], x1
ld1 {v1.b}[6], [x3], x1
ld1 {v1.b}[5], [x3], x1
ld1 {v1.b}[4], [x3], x1
add x2, x2, #1
add x3, x3, x1
// load pTop[4+i] and pLeft[(4+i)*kiStride]
ld1 {v0.b}[0], [x2], #1
ld1 {v0.b}[1], [x2], #1
ld1 {v0.b}[2], [x2], #1
ld1 {v0.b}[3], [x2], #1
ld1 {v0.b}[4], [x3], x1
ld1 {v0.b}[5], [x3], x1
ld1 {v0.b}[6], [x3], x1
ld1 {v0.b}[7], [x3], x1
uxtl v1.8h, v1.8b
uxtl v0.8h, v0.8b
ldr q2, intra_1_to_4
ldr q3, intra_m3_to_p4
dup v4.8h, v0.h[3]
dup v5.8h, v0.h[7]
add v4.8h, v4.8h, v5.8h
sub v0.8h, v0.8h, v1.8h
shl v4.8h, v4.8h, #4 // v4.8h is a
mul v0.8h, v0.8h, v2.8h // v0.h[0-3] is H, v0.h[4-7] is V
saddlp v0.4s, v0.8h
addp v0.4s, v0.4s, v0.4s // v0.s[0] is H, v0.s[1] is V
sqrshrn v0.4h, v0.4s, #5
dup v1.8h, v0.h[0] // v1.8h is b
dup v0.8h, v0.h[1] // v0.8h is c
mla v4.8h, v1.8h, v3.8h
mla v4.8h, v0.8h, v3.h[0]
sqrshrun v1.8b, v4.8h, #5
st1 {v1.8b}, [x0], x1
.rept 7
add v4.8h, v4.8h, v0.8h
sqrshrun v1.8b, v4.8h, #5
st1 {v1.8b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
//for Luma 16x16
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredV_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
ld1 {v0.16b}, [x2]
.rept 16
st1 {v0.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredH_AArch64_neon
sxtw x1, w1
sub x2, x0, #1
.rept 16
ld1r {v0.16b}, [x2], x1
st1 {v0.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredDc_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
sub x3, x0, #1
ld1 {v0.16b}, [x2]
ld1 {v1.b}[0], [x3], x1
ld1 {v1.b}[1], [x3], x1
ld1 {v1.b}[2], [x3], x1
ld1 {v1.b}[3], [x3], x1
ld1 {v1.b}[4], [x3], x1
ld1 {v1.b}[5], [x3], x1
ld1 {v1.b}[6], [x3], x1
ld1 {v1.b}[7], [x3], x1
ld1 {v1.b}[8], [x3], x1
ld1 {v1.b}[9], [x3], x1
ld1 {v1.b}[10], [x3], x1
ld1 {v1.b}[11], [x3], x1
ld1 {v1.b}[12], [x3], x1
ld1 {v1.b}[13], [x3], x1
ld1 {v1.b}[14], [x3], x1
ld1 {v1.b}[15], [x3]
// reduce instruction
uaddlv h0, v0.16b
uaddlv h1, v1.16b
add v0.8h, v0.8h, v1.8h
uqrshrn b0, h0, #5
dup v0.16b, v0.b[0]
.rept 16
st1 {v0.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredDcTop_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
ld1 {v0.16b}, [x2]
// reduce instruction
uaddlv h0, v0.16b
uqrshrn v0.8b, v0.8h, 4
dup v0.16b, v0.b[0]
.rept 16
st1 {v0.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredDcLeft_AArch64_neon
sxtw x1, w1
sub x3, x0, #1
ld1 {v1.b}[0], [x3], x1
ld1 {v1.b}[1], [x3], x1
ld1 {v1.b}[2], [x3], x1
ld1 {v1.b}[3], [x3], x1
ld1 {v1.b}[4], [x3], x1
ld1 {v1.b}[5], [x3], x1
ld1 {v1.b}[6], [x3], x1
ld1 {v1.b}[7], [x3], x1
ld1 {v1.b}[8], [x3], x1
ld1 {v1.b}[9], [x3], x1
ld1 {v1.b}[10], [x3], x1
ld1 {v1.b}[11], [x3], x1
ld1 {v1.b}[12], [x3], x1
ld1 {v1.b}[13], [x3], x1
ld1 {v1.b}[14], [x3], x1
ld1 {v1.b}[15], [x3]
// reduce instruction
uaddlv h1, v1.16b
uqrshrn v0.8b, v1.8h, #4
dup v0.16b, v0.b[0]
.rept 16
st1 {v0.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
.align 4
intra_1_to_8: .short 5, 10, 15, 20, 25, 30, 35, 40
intra_m7_to_p8: .short -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8
WELS_ASM_AARCH64_FUNC_BEGIN WelsDecoderI16x16LumaPredPlane_AArch64_neon
sxtw x1, w1
sub x2, x0, x1
sub x2, x2, #1
mov x3, x2
ld1 {v0.8b}, [x3] // v0 low 8 bit in top(reverse order)
add x3, x3, #9
rev64 v0.8b, v0.8b // reverse v0
ld1 {v1.8b}, [x3] // v1 high 8 bit in top
uxtl v0.8h, v0.8b // extend to 16 bit integer
uxtl v1.8h, v1.8b // extend to 16 bit integer
ld1 {v2.b}[7], [x2], x1
ld1 {v2.b}[6], [x2], x1
ld1 {v2.b}[5], [x2], x1
ld1 {v2.b}[4], [x2], x1
ld1 {v2.b}[3], [x2], x1
ld1 {v2.b}[2], [x2], x1
ld1 {v2.b}[1], [x2], x1
ld1 {v2.b}[0], [x2], x1 // v2.8b low 8 bit in left
add x2, x2, x1
ld1 {v3.b}[0], [x2], x1
ld1 {v3.b}[1], [x2], x1
ld1 {v3.b}[2], [x2], x1
ld1 {v3.b}[3], [x2], x1
ld1 {v3.b}[4], [x2], x1
ld1 {v3.b}[5], [x2], x1
ld1 {v3.b}[6], [x2], x1
ld1 {v3.b}[7], [x2] // v3.8b high 8bit in left
uxtl v2.8h, v2.8b
uxtl v3.8h, v3.8b
sub v0.8h, v1.8h, v0.8h
sub v2.8h, v3.8h, v2.8h
ldr q4, intra_1_to_8
mul v0.8h, v0.8h, v4.8h
mul v2.8h, v2.8h, v4.8h
saddlv s0, v0.8h
saddlv s2, v2.8h
add v1.8h, v1.8h, v3.8h
sqrshrn v0.4h, v0.4S, #6 // b is in v0.h[0]
sqrshrn v2.4h, v2.4S, #6 // c is in v2.h[0]
shl v1.8h, v1.8h, #4 // a is in v1.h[7]
ldr q4, intra_m7_to_p8
ldr q5, intra_m7_to_p8 + 16
dup v1.8h, v1.h[7]
dup v3.8h, v1.h[7]
mla v1.8h, v4.8h, v0.h[0]
mla v3.8h, v5.8h, v0.h[0]
dup v2.8h, v2.h[0] // v2.8h is [cccccccc]
mla v1.8h, v2.8h, v4.h[0]
mla v3.8h, v2.8h, v4.h[0]
sqrshrun v4.8b, v1.8h, #5
sqrshrun2 v4.16b, v3.8h, #5
st1 {v4.16b}, [x0], x1
.rept 15
add v1.8h, v1.8h, v2.8h
add v3.8h, v3.8h, v2.8h
sqrshrun v4.8b, v1.8h, #5
sqrshrun2 v4.16b, v3.8h, #5
st1 {v4.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
#endif

View File

@ -0,0 +1,199 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* \file au_parser.h
*
* \brief Interfaces introduced in Access Unit level based parser
*
* \date 03/10/2009 Created
*
*************************************************************************************
*/
#ifndef WELS_ACCESS_UNIT_PARSER_H__
#define WELS_ACCESS_UNIT_PARSER_H__
#include "typedefs.h"
#include "wels_common_basis.h"
#include "nal_prefix.h"
#include "dec_frame.h"
#include "bit_stream.h"
#include "parameter_sets.h"
#include "decoder_context.h"
namespace WelsDec {
/*!
*************************************************************************************
* \brief Start Code Prefix (0x 00 00 00 01) detection
*
* \param pBuf bitstream payload buffer
* \param pOffset offset between NAL rbsp and original bitsteam that
* start code prefix is seperated from.
* \param iBufSize count size of buffer
*
* \return RBSP buffer of start code prefix exclusive
*
* \note N/A
*************************************************************************************
*/
uint8_t* DetectStartCodePrefix (const uint8_t* kpBuf, int32_t* pOffset, int32_t iBufSize);
/*!
*************************************************************************************
* \brief to parse network abstraction layer unit,
* escape emulation_prevention_three_byte within it
former name is parse_nal
*
* \param pCtx decoder context
* \param pNalUnitHeader parsed result of NAL Unit Header to output
* \param pSrcRbsp bitstream buffer to input
* \param iSrcRbspLen length size of bitstream buffer payload
* \param pSrcNal
* \param iSrcNalLen
* \param pConsumedBytes consumed bytes during parsing
*
* \return decoded bytes payload, might be (pSrcRbsp+1) if no escapes
*
* \note N/A
*************************************************************************************
*/
uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeader, uint8_t* pSrcRbsp,
int32_t iSrcRbspLen, uint8_t* pSrcNal, int32_t iSrcNalLen, int32_t* pConsumedBytes);
int32_t ParseNonVclNal (PWelsDecoderContext pCtx, uint8_t* pRbsp, const int32_t kiSrcLen, uint8_t* pSrcNal,
const int32_t kSrcNalLen);
int32_t ParseRefBasePicMarking (PBitStringAux pBs, PRefBasePicMarking pRefBasePicMarking);
int32_t ParsePrefixNalUnit (PWelsDecoderContext pCtx, PBitStringAux pBs);
bool CheckAccessUnitBoundary (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PNalUnit kpLastNal,
const PSps kpSps);
bool CheckAccessUnitBoundaryExt (PNalUnitHeaderExt pLastNalHdrExt, PNalUnitHeaderExt pCurNalHeaderExt,
PSliceHeader pLastSliceHeader, PSliceHeader pCurSliceHeader);
bool CheckNextAuNewSeq (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PSps kpSps);
/*!
*************************************************************************************
* \brief to parse Sequence Parameter Set (SPS)
*
* \param pCtx Decoder context
* \param pBsAux bitstream reader auxiliary
* \param pPicWidth picture width current Sps represented
* \param pPicHeight picture height current Sps represented
*
* \return 0 - successed
* 1 - failed
*
* \note Call it in case eNalUnitType is SPS.
*************************************************************************************
*/
int32_t ParseSps (PWelsDecoderContext pCtx, PBitStringAux pBsAux, int32_t* pPicWidth, int32_t* pPicHeight,
uint8_t* pSrcNal, const int32_t kSrcNalLen);
/*!
*************************************************************************************
* \brief to parse Picture Parameter Set (PPS)
*
* \param pCtx Decoder context
* \param pPpsList pps list
* \param pBsAux bitstream reader auxiliary
*
* \return 0 - successed
* 1 - failed
*
* \note Call it in case eNalUnitType is PPS.
*************************************************************************************
*/
int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux, uint8_t* pSrcNal,
const int32_t kSrcNalLen);
/*!
*************************************************************************************
* \brief to parse Video Usability Information (VUI) parameter of the SPS
*
* \param pCtx Decoder context
* \param pSps the sps which current Vui parameter belongs to
* \param pBsAux bitstream reader auxiliary
*
* \return 0 - successed
* 1 - failed
*
* \note Call it in case the flag "vui_parameters_present_flag" in sps is true.
*************************************************************************************
*/
int32_t ParseVui (PWelsDecoderContext pCtx, PSps pSps, PBitStringAux pBsAux);
/*!
*************************************************************************************
* \brief to parse scaling list message payload
*
* \param PPS SPS scaling list matrix message to be parsed output
* \param pBsAux bitstream reader auxiliary
*
* \return 0 - successed
* 1 - failed
*
* \note Call it in case scaling matrix present at sps or pps
*************************************************************************************
*/
int32_t SetScalingListValue (uint8_t* pScalingList, int iScalingListNum, bool* bUseDefaultScalingMatrixFlag,
PBitStringAux pBsAux);
int32_t ParseScalingList (PSps pSps, PBitStringAux pBs, bool bPPS, const bool kbTrans8x8ModeFlag,
bool* bScalingListPresentFlag, uint8_t (*iScalingList4x4)[16], uint8_t (*iScalingList8x8)[64]);
/*!
*************************************************************************************
* \brief to parse SEI message payload
*
* \param pSei sei message to be parsed output
* \param pBsAux bitstream reader auxiliary
*
* \return 0 - successed
* 1 - failed
*
* \note Call it in case eNalUnitType is NAL_UNIT_SEI.
*************************************************************************************
*/
int32_t ParseSei (void* pSei, PBitStringAux pBsAux); // reserved Sei_Msg type
/*!
*************************************************************************************
* \brief reset fmo list due to got Sps now
*
* \param pCtx decoder context
*
* \return count number of fmo context units are reset
*************************************************************************************
*/
int32_t ResetFmoList (PWelsDecoderContext pCtx);
} // namespace WelsDec
#endif//WELS_ACCESS_UNIT_PARSER_H__

View File

@ -0,0 +1,62 @@
/*!
* \copy
* Copyright (c) 2004-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
//bit_stream.h - bit-stream reading and / writing auxiliary data
#ifndef WELS_BIT_STREAM_H__
#define WELS_BIT_STREAM_H__
#include "typedefs.h"
#include "wels_common_defs.h"
#include "golomb_common.h"
using namespace WelsCommon;
namespace WelsDec {
/*!
* \brief input bits for decoder or initialize bitstream writing in encoder
*
* \param pBitString Bit string auxiliary pointer
* \param kpBuf bit-stream buffer
* \param kiSize size in bits for decoder; size in bytes for encoder
*
* \return size of buffer data in byte; failed in -1 return
*/
int32_t DecInitBits (PBitStringAux pBitString, const uint8_t* kpBuf, const int32_t kiSize);
int32_t InitReadBits (PBitStringAux pBitString, intX_t iEndOffset);
void RBSP2EBSP (uint8_t* pDstBuf, uint8_t* pSrcBuf, const int32_t kiSize);
} // namespace WelsDec
#endif//WELS_BIT_STREAM_H__

View File

@ -0,0 +1,111 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* \file cabac_decoder.h
*
* \brief Interfaces introduced for cabac decoder
*
* \date 10/10/2014 Created
*
*************************************************************************************
*/
#ifndef WELS_CABAC_DECODER_H__
#define WELS_CABAC_DECODER_H__
#include "decoder_context.h"
#include "error_code.h"
#include "wels_common_defs.h"
namespace WelsDec {
static const uint8_t g_kRenormTable256[256] = {
6, 6, 6, 6, 6, 6, 6, 6,
5, 5, 5, 5, 5, 5, 5, 5,
4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1
};
//1. CABAC context initialization
void WelsCabacGlobalInit (PWelsDecoderContext pCabacCtx);
void WelsCabacContextInit (PWelsDecoderContext pCtx, uint8_t eSliceType, int32_t iCabacInitIdc, int32_t iQp);
//2. decoding Engine initialization
int32_t InitCabacDecEngineFromBS (PWelsCabacDecEngine pDecEngine, SBitStringAux* pBsAux);
void RestoreCabacDecEngineToBS (PWelsCabacDecEngine pDecEngine, SBitStringAux* pBsAux);
//3. actual decoding
int32_t Read32BitsCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiValue, int32_t& iNumBitsRead);
int32_t DecodeBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiBit);
int32_t DecodeBypassCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal);
int32_t DecodeTerminateCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal);
//4. unary parsing
int32_t DecodeUnaryBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, int32_t iCtxOffset,
uint32_t& uiSymVal);
//5. EXGk parsing
int32_t DecodeExpBypassCabac (PWelsCabacDecEngine pDecEngine, int32_t iCount, uint32_t& uiSymVal);
uint32_t DecodeUEGLevelCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiBinVal);
int32_t DecodeUEGMvCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t iMaxC, uint32_t& uiCode);
#define WELS_CABAC_HALF 0x01FE
#define WELS_CABAC_QUARTER 0x0100
#define WELS_CABAC_FALSE_RETURN(iErrorInfo) \
if(iErrorInfo) { \
return iErrorInfo; \
}
}
#endif

View File

@ -0,0 +1,118 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file deblocking.h
*
* \brief Interfaces introduced in frame deblocking filtering
*
* \date 05/14/2009 Created
*
*************************************************************************************
*/
#ifndef WELS_DEBLOCKING_H__
#define WELS_DEBLOCKING_H__
#include "decoder_context.h"
#include "deblocking_common.h"
namespace WelsDec {
/*!
* \brief deblocking module initialize
*
* \param pf
* cpu
*
* \return NONE
*/
void DeblockingInit (PDeblockingFunc pDeblockingFunc, int32_t iCpu);
/*!
* \brief deblocking filtering target slice
*
* \param dec Wels decoder context
*
* \return NONE
*/
void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb);
/*!
* \brief AVC slice init deblocking filtering target layer
*
* \in and out param SDeblockingFilter
* \in and out param iFilterIdc
*
* \return NONE
*/
void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc);
/*!
* \brief AVC MB deblocking filtering target layer
*
* \param DqLayer which has the current location of MB to be deblocked.
*
* \return NONE
*/
void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc,
PDeblockingFilterMbFunc pDeblockMb);
/*!
* \brief pixel deblocking filtering
*
* \param filter deblocking filter
* \param pix pixel value
* \param stride frame stride
* \param bs boundary strength
*
* \return NONE
*/
uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
int32_t iNeighMb, int32_t iMbXy);
uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy);
int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc);
void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag);
inline int8_t* GetPNzc (PDqLayer pCurDqLayer, int32_t iMbXy) {
if (pCurDqLayer->pDec != NULL && pCurDqLayer->pDec->pNzc != NULL) {
return pCurDqLayer->pDec->pNzc[iMbXy];
}
return pCurDqLayer->pNzc[iMbXy];
}
} // namespace WelsDec
#endif //WELS_DEBLOCKING_H__

View File

@ -0,0 +1,156 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
//dec_frame.h
#ifndef WELS_DEC_FRAME_H__
#define WELS_DEC_FRAME_H__
#include "typedefs.h"
#include "wels_const.h"
#include "wels_common_basis.h"
#include "parameter_sets.h"
#include "nal_prefix.h"
#include "slice.h"
#include "picture.h"
#include "bit_stream.h"
#include "fmo.h"
namespace WelsDec {
///////////////////////////////////DQ Layer level///////////////////////////////////
typedef struct TagDqLayer SDqLayer;
typedef SDqLayer* PDqLayer;
typedef struct TagLayerInfo {
SNalUnitHeaderExt sNalHeaderExt;
SSlice sSliceInLayer; // Here Slice identify to Frame on concept
PSubsetSps pSubsetSps; // current pSubsetSps used, memory alloc in external
PSps pSps; // current sps based avc used, memory alloc in external
PPps pPps; // current pps used
} SLayerInfo, *PLayerInfo;
/* Layer Representation */
struct TagDqLayer {
SLayerInfo sLayerInfo;
PBitStringAux pBitStringAux; // pointer to SBitStringAux
PFmo pFmo; // Current fmo context pointer used
uint32_t* pMbType;
int32_t* pSliceIdc; // using int32_t for slice_idc
int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
int16_t (*pMvd[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM];
int8_t (*pDirect)[MB_BLOCK4x4_NUM];
bool* pNoSubMbPartSizeLessThan8x8Flag;
bool* pTransformSize8x8Flag;
int8_t* pLumaQp;
int8_t (*pChromaQp)[2];
int8_t* pCbp;
uint16_t *pCbfDc;
int8_t (*pNzc)[24];
int8_t (*pNzcRs)[24];
int8_t* pResidualPredFlag;
int8_t* pInterPredictionDoneFlag;
bool* pMbCorrectlyDecodedFlag;
bool* pMbRefConcealedFlag;
int16_t (*pScaledTCoeff)[MB_COEFF_LIST_SIZE];
int8_t (*pIntraPredMode)[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
int8_t (*pIntra4x4FinalMode)[MB_BLOCK4x4_NUM];
uint8_t *pIntraNxNAvailFlag;
int8_t* pChromaPredMode;
//uint8_t (*motion_pred_flag[LIST_A])[MB_PARTITION_SIZE]; // 8x8
uint32_t (*pSubMbType)[MB_SUB_PARTITION_SIZE];
int32_t iLumaStride;
int32_t iChromaStride;
uint8_t* pPred[3];
int32_t iMbX;
int32_t iMbY;
int32_t iMbXyIndex;
int32_t iMbWidth; // MB width of this picture, equal to sSps.iMbWidth
int32_t iMbHeight; // MB height of this picture, equal to sSps.iMbHeight;
/* Common syntax elements across all slices of a DQLayer */
int32_t iSliceIdcBackup;
uint32_t uiSpsId;
uint32_t uiPpsId;
uint32_t uiDisableInterLayerDeblockingFilterIdc;
int32_t iInterLayerSliceAlphaC0Offset;
int32_t iInterLayerSliceBetaOffset;
//SPosOffset sScaledRefLayer;
int32_t iSliceGroupChangeCycle;
PRefPicListReorderSyn pRefPicListReordering;
PPredWeightTabSyn pPredWeightTable;
PRefPicMarking pRefPicMarking; // Decoded reference picture marking syntaxs
PRefBasePicMarking pRefPicBaseMarking;
PPicture pRef; // reference picture pointer
PPicture pDec; // reconstruction picture pointer for layer
int16_t iColocMv[2][16][2]; //Colocated MV cache
int8_t iColocRefIndex[2][16]; //Colocated RefIndex cache
int8_t iColocIntra[16]; //Colocated Intra cache
bool bUseWeightPredictionFlag;
bool bUseWeightedBiPredIdc;
bool bStoreRefBasePicFlag; // iCurTid == 0 && iCurQid = 0 && bEncodeKeyPic = 1
bool bTCoeffLevelPredFlag;
bool bConstrainedIntraResamplingFlag;
uint8_t uiRefLayerDqId;
uint8_t uiRefLayerChromaPhaseXPlus1Flag;
uint8_t uiRefLayerChromaPhaseYPlus1;
uint8_t uiLayerDqId; // dq_id of current layer
bool bUseRefBasePicFlag; // whether reference pic or reference base pic is referred?
};
typedef struct TagGpuAvcLayer {
SLayerInfo sLayerInfo;
PBitStringAux pBitStringAux; // pointer to SBitStringAux
uint32_t* pMbType;
int32_t* pSliceIdc; // using int32_t for slice_idc
int8_t* pLumaQp;
int8_t* pCbp;
int8_t (*pNzc)[24];
int8_t (*pIntraPredMode)[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
int32_t iMbX;
int32_t iMbY;
int32_t iMbXyIndex;
int32_t iMbWidth; // MB width of this picture, equal to sSps.iMbWidth
int32_t iMbHeight; // MB height of this picture, equal to sSps.iMbHeight;
} SGpuAvcDqLayer, *PGpuAvcDqLayer;
///////////////////////////////////////////////////////////////////////
} // namespace WelsDec
#endif//WELS_DEC_FRAME_H__

View File

@ -0,0 +1,344 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file golomb.h
*
* \brief Exponential Golomb entropy coding/decoding routine
*
* \date 03/13/2009 Created
*
*************************************************************************************
*/
#ifndef WELS_EXPONENTIAL_GOLOMB_ENTROPY_CODING_H__
#define WELS_EXPONENTIAL_GOLOMB_ENTROPY_CODING_H__
#include "typedefs.h"
#include "bit_stream.h"
#include "macros.h"
//#include <assert.h>
#include "ls_defines.h"
#include "error_code.h"
namespace WelsDec {
#define WELS_READ_VERIFY(uiRet) do{ \
uint32_t uiRetTmp = (uint32_t)uiRet; \
if( uiRetTmp != ERR_NONE ) \
return uiRetTmp; \
}while(0)
#define GET_WORD(iCurBits, pBufPtr, iLeftBits, iAllowedBytes, iReadBytes) { \
if (iReadBytes > iAllowedBytes+1) { \
return ERR_INFO_READ_OVERFLOW; \
} \
iCurBits |= ((uint32_t)((pBufPtr[0] << 8) | pBufPtr[1])) << (iLeftBits); \
iLeftBits -= 16; \
pBufPtr +=2; \
}
#define NEED_BITS(iCurBits, pBufPtr, iLeftBits, iAllowedBytes, iReadBytes) { \
if (iLeftBits > 0) { \
GET_WORD(iCurBits, pBufPtr, iLeftBits, iAllowedBytes, iReadBytes); \
} \
}
#define UBITS(iCurBits, iNumBits) (iCurBits>>(32-(iNumBits)))
#define DUMP_BITS(iCurBits, pBufPtr, iLeftBits, iNumBits, iAllowedBytes, iReadBytes) { \
iCurBits <<= (iNumBits); \
iLeftBits += (iNumBits); \
NEED_BITS(iCurBits, pBufPtr, iLeftBits, iAllowedBytes, iReadBytes); \
}
static inline int32_t BsGetBits (PBitStringAux pBs, int32_t iNumBits, uint32_t* pCode) {
intX_t iRc = UBITS (pBs->uiCurBits, iNumBits);
intX_t iAllowedBytes = pBs->pEndBuf - pBs->pStartBuf; //actual stream bytes
intX_t iReadBytes = pBs->pCurBuf - pBs->pStartBuf;
DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, iNumBits, iAllowedBytes, iReadBytes);
*pCode = (uint32_t)iRc;
return ERR_NONE;
}
/*
* Exponential Golomb codes decoding routines
*/
// for data sharing cross modules and try to reduce size of binary generated, 12/10/2009
extern const uint8_t g_kuiIntra4x4CbpTable[48];
extern const uint8_t g_kuiIntra4x4CbpTable400[16];
extern const uint8_t g_kuiInterCbpTable[48];
extern const uint8_t g_kuiInterCbpTable400[16];
extern const uint8_t g_kuiLeadingZeroTable[256];
static const uint32_t g_kuiPrefix8BitsTable[16] = {
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3
};
static inline uint32_t GetPrefixBits (uint32_t uiValue) {
uint32_t iNumBit = 0;
if (uiValue & 0xffff0000) {
uiValue >>= 16;
iNumBit += 16;
}
if (uiValue & 0xff00) {
uiValue >>= 8;
iNumBit += 8;
}
if (uiValue & 0xf0) {
uiValue >>= 4;
iNumBit += 4;
}
iNumBit += g_kuiPrefix8BitsTable[uiValue];
return (32 - iNumBit);
}
/*
* Read one bit from bit stream followed
*/
static inline uint32_t BsGetOneBit (PBitStringAux pBs, uint32_t* pCode) {
return (BsGetBits (pBs, 1, pCode));
}
static inline int32_t GetLeadingZeroBits (uint32_t iCurBits) { //<=32 bits
uint32_t uiValue;
uiValue = UBITS (iCurBits, 8); //ShowBits( bs, 8 );
if (uiValue) {
return g_kuiLeadingZeroTable[uiValue];
}
uiValue = UBITS (iCurBits, 16); //ShowBits( bs, 16 );
if (uiValue) {
return (g_kuiLeadingZeroTable[uiValue] + 8);
}
uiValue = UBITS (iCurBits, 24); //ShowBits( bs, 24 );
if (uiValue) {
return (g_kuiLeadingZeroTable[uiValue] + 16);
}
uiValue = iCurBits; //ShowBits( bs, 32 );
if (uiValue) {
return (g_kuiLeadingZeroTable[uiValue] + 24);
}
//ASSERT(false); // should not go here
return -1;
}
static inline uint32_t BsGetUe (PBitStringAux pBs, uint32_t* pCode) {
uint32_t iValue = 0;
int32_t iLeadingZeroBits = GetLeadingZeroBits (pBs->uiCurBits);
intX_t iAllowedBytes, iReadBytes;
iAllowedBytes = pBs->pEndBuf - pBs->pStartBuf; //actual stream bytes
if (iLeadingZeroBits == -1) { //bistream error
return ERR_INFO_READ_LEADING_ZERO;//-1
} else if (iLeadingZeroBits >
16) { //rarely into this condition (even may be bitstream error), prevent from 16-bit reading overflow
//using two-step reading instead of one time reading of >16 bits.
iReadBytes = pBs->pCurBuf - pBs->pStartBuf;
DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, 16, iAllowedBytes, iReadBytes);
iReadBytes = pBs->pCurBuf - pBs->pStartBuf;
DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, iLeadingZeroBits + 1 - 16, iAllowedBytes, iReadBytes);
} else {
iReadBytes = pBs->pCurBuf - pBs->pStartBuf;
DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, iLeadingZeroBits + 1, iAllowedBytes, iReadBytes);
}
if (iLeadingZeroBits) {
iValue = UBITS (pBs->uiCurBits, iLeadingZeroBits);
iReadBytes = pBs->pCurBuf - pBs->pStartBuf;
DUMP_BITS (pBs->uiCurBits, pBs->pCurBuf, pBs->iLeftBits, iLeadingZeroBits, iAllowedBytes, iReadBytes);
}
*pCode = ((1u << iLeadingZeroBits) - 1 + iValue);
return ERR_NONE;
}
/*
* Read signed exp golomb codes
*/
static inline int32_t BsGetSe (PBitStringAux pBs, int32_t* pCode) {
uint32_t uiCodeNum;
WELS_READ_VERIFY (BsGetUe (pBs, &uiCodeNum));
if (uiCodeNum & 0x01) {
*pCode = (int32_t) ((uiCodeNum + 1) >> 1);
} else {
*pCode = NEG_NUM ((int32_t) (uiCodeNum >> 1));
}
return ERR_NONE;
}
/*
* Get unsigned truncated exp golomb code.
*/
static inline int32_t BsGetTe0 (PBitStringAux pBs, int32_t iRange, uint32_t* pCode) {
if (iRange == 1) {
*pCode = 0;
} else if (iRange == 2) {
WELS_READ_VERIFY (BsGetOneBit (pBs, pCode));
*pCode ^= 1;
} else {
WELS_READ_VERIFY (BsGetUe (pBs, pCode));
}
return ERR_NONE;
}
/*
* Get number of trailing bits
*/
static inline int32_t BsGetTrailingBits (uint8_t* pBuf) {
// TODO
uint32_t uiValue = *pBuf;
int32_t iRetNum = 0;
do {
if (uiValue & 1)
return iRetNum;
uiValue >>= 1;
++ iRetNum;
} while (iRetNum < 9);
return 0;
}
/*
* Check whether there is more rbsp data for processing
*/
static inline bool CheckMoreRBSPData (PBitStringAux pBsAux) {
if ((pBsAux->iBits - ((pBsAux->pCurBuf - pBsAux->pStartBuf - 2) << 3) - pBsAux->iLeftBits) > 1) {
return true;
} else {
return false;
}
}
//define macros to check syntax elements
#define WELS_CHECK_SE_BOTH_ERROR(val, lower_bound, upper_bound, syntax_name, ret_code) do {\
if ((val < lower_bound) || (val > upper_bound)) {\
WelsLog(&(pCtx->sLogCtx), WELS_LOG_ERROR, "invalid syntax " syntax_name " %d", val);\
return ret_code;\
}\
}while(0)
#define WELS_CHECK_SE_LOWER_ERROR(val, lower_bound, syntax_name, ret_code) do {\
if (val < lower_bound) {\
WelsLog(&(pCtx->sLogCtx), WELS_LOG_ERROR, "invalid syntax " syntax_name " %d", val);\
return ret_code;\
}\
}while(0)
#define WELS_CHECK_SE_UPPER_ERROR(val, upper_bound, syntax_name, ret_code) do {\
if (val > upper_bound) {\
WelsLog(&(pCtx->sLogCtx), WELS_LOG_ERROR, "invalid syntax " syntax_name " %d", val);\
return ret_code;\
}\
}while(0)
#define WELS_CHECK_SE_BOTH_ERROR_NOLOG(val, lower_bound, upper_bound, syntax_name, ret_code) do {\
if ((val < lower_bound) || (val > upper_bound)) {\
return ret_code;\
}\
}while(0)
#define WELS_CHECK_SE_LOWER_ERROR_NOLOG(val, lower_bound, syntax_name, ret_code) do {\
if (val < lower_bound) {\
return ret_code;\
}\
}while(0)
#define WELS_CHECK_SE_UPPER_ERROR_NOLOG(val, upper_bound, syntax_name, ret_code) do {\
if (val > upper_bound) {\
return ret_code;\
}\
}while(0)
#define WELS_CHECK_SE_BOTH_WARNING(val, lower_bound, upper_bound, syntax_name) do {\
if ((val < lower_bound) || (val > upper_bound)) {\
WelsLog(&(pCtx->sLogCtx), WELS_LOG_WARNING, "invalid syntax " syntax_name " %d", val);\
}\
}while(0)
#define WELS_CHECK_SE_LOWER_WARNING(val, lower_bound, syntax_name) do {\
if (val < lower_bound) {\
WelsLog(&(pCtx->sLogCtx), WELS_LOG_WARNING, "invalid syntax " syntax_name " %d", val);\
}\
}while(0)
#define WELS_CHECK_SE_UPPER_WARNING(val, upper_bound, syntax_name) do {\
if (val > upper_bound) {\
WelsLog(&(pCtx->sLogCtx), WELS_LOG_WARNING, "invalid syntax " syntax_name " %d", val);\
}\
}while(0)
// below define syntax element offset
// for bit_depth_luma_minus8 and bit_depth_chroma_minus8
#define BIT_DEPTH_LUMA_OFFSET 8
#define BIT_DEPTH_CHROMA_OFFSET 8
// for log2_max_frame_num_minus4
#define LOG2_MAX_FRAME_NUM_OFFSET 4
// for log2_max_pic_order_cnt_lsb_minus4
#define LOG2_MAX_PIC_ORDER_CNT_LSB_OFFSET 4
// for pic_width_in_mbs_minus1
#define PIC_WIDTH_IN_MBS_OFFSET 1
// for pic_height_in_map_units_minus1
#define PIC_HEIGHT_IN_MAP_UNITS_OFFSET 1
// for bit_depth_aux_minus8
#define BIT_DEPTH_AUX_OFFSET 8
// for num_slice_groups_minus1
#define NUM_SLICE_GROUPS_OFFSET 1
// for run_length_minus1
#define RUN_LENGTH_OFFSET 1
// for slice_group_change_rate_minus1
#define SLICE_GROUP_CHANGE_RATE_OFFSET 1
// for pic_size_in_map_units_minus1
#define PIC_SIZE_IN_MAP_UNITS_OFFSET 1
// for num_ref_idx_l0_default_active_minus1 and num_ref_idx_l1_default_active_minus1
#define NUM_REF_IDX_L0_DEFAULT_ACTIVE_OFFSET 1
#define NUM_REF_IDX_L1_DEFAULT_ACTIVE_OFFSET 1
// for pic_init_qp_minus26 and pic_init_qs_minus26
#define PIC_INIT_QP_OFFSET 26
#define PIC_INIT_QS_OFFSET 26
// for num_ref_idx_l0_active_minus1 and num_ref_idx_l1_active_minus1
#define NUM_REF_IDX_L0_ACTIVE_OFFSET 1
#define NUM_REF_IDX_L1_ACTIVE_OFFSET 1
// From Level 5.2
#define MAX_MB_SIZE 36864
// for aspect_ratio_idc
#define EXTENDED_SAR 255
} // namespace WelsDec
#endif//WELS_EXPONENTIAL_GOLOMB_ENTROPY_CODING_H__

View File

@ -0,0 +1,78 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef WELS_DECODE_MB_AUX_H__
#define WELS_DECODE_MB_AUX_H__
#include "typedefs.h"
#include "macros.h"
namespace WelsDec {
void IdctResAddPred_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
void IdctResAddPred8x8_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
#if defined(X86_ASM)
void IdctResAddPred_mmx (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
void IdctResAddPred_sse2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
#if defined(HAVE_AVX2)
void IdctResAddPred_avx2 (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
void IdctFourResAddPred_avx2 (uint8_t* pPred, int32_t iStride, int16_t* pRs, const int8_t* pNzc);
#endif
#endif//X86_ASM
#if defined(HAVE_NEON)
void IdctResAddPred_neon (uint8_t* pred, const int32_t stride, int16_t* rs);
#endif
#if defined(HAVE_NEON_AARCH64)
void IdctResAddPred_AArch64_neon (uint8_t* pred, const int32_t stride, int16_t* rs);
#endif
#if defined(HAVE_MMI)
void IdctResAddPred_mmi (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
#endif//HAVE_MMI
#if defined(__cplusplus)
}
#endif//__cplusplus
void GetI4LumaIChromaAddrTable (int32_t* pBlockOffset, const int32_t kiYStride, const int32_t kiUVStride);
} // namespace WelsDec
#endif//WELS_DECODE_MB_AUX_H__

View File

@ -0,0 +1,104 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef WELS_DECODE_SLICE_H__
#define WELS_DECODE_SLICE_H__
#include "decoder_context.h"
namespace WelsDec {
int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx);
int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx);
int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx);
int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
typedef int32_t (*PWelsDecMbFunc) (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag);
int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag);
int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag);
int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx); //construction based on slice
int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur);
int32_t WelsDecodeAndConstructSlice (PWelsDecoderContext pCtx);
int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx);
int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, bool bOutput);
int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer,
uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC);
int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer);
void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx);
int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer);
void WelsChromaDcIdct (int16_t* pBlock);
bool ComputeColocatedTemporalScaling (PWelsDecoderContext pCtx);
#ifdef __cplusplus
extern "C" {
#endif//__cplusplus
#if defined(X86_ASM)
void WelsBlockZero16x16_sse2 (int16_t* block, int32_t stride);
void WelsBlockZero8x8_sse2 (int16_t* block, int32_t stride);
#endif
#if defined(HAVE_NEON)
void WelsBlockZero16x16_neon (int16_t* block, int32_t stride);
void WelsBlockZero8x8_neon (int16_t* block, int32_t stride);
#endif
#if defined(HAVE_NEON_AARCH64)
void WelsBlockZero16x16_AArch64_neon (int16_t* block, int32_t stride);
void WelsBlockZero8x8_AArch64_neon (int16_t* block, int32_t stride);
#endif
#ifdef __cplusplus
}
#endif//__cplusplus
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu);
void WelsBlockZero16x16_c (int16_t* block, int32_t stride);
void WelsBlockZero8x8_c (int16_t* block, int32_t stride);
} // namespace WelsDec
#endif //WELS_DECODE_SLICE_H__

View File

@ -0,0 +1,186 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file decoder.h
*
* \brief Interfaces introduced in decoder system architecture
*
* \date 03/10/2009 Created
*
*************************************************************************************
*/
#ifndef WELS_DECODER_SYSTEM_ARCHITECTURE_H__
#define WELS_DECODER_SYSTEM_ARCHITECTURE_H__
#include "typedefs.h"
#include "decoder_context.h"
namespace WelsDec {
#ifdef __cplusplus
extern "C" {
#endif//__cplusplus
/*!
* \brief configure decoder parameters
*/
int32_t DecoderConfigParam (PWelsDecoderContext pCtx, const SDecodingParam* kpParam);
/*!
* \brief fill in default values of decoder context
*/
void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx);
/*
* fill last decoded picture info
*/
void WelsDecoderLastDecPicInfoDefaults (SWelsLastDecPicInfo& sLastDecPicInfo);
/*!
* \brief fill data fields in SPS and PPS default for decoder context
*/
void WelsDecoderSpsPpsDefaults (SWelsDecoderSpsPpsCTX& sSpsPpsCtx);
/*!
* \brief copy SpsPps from one Ctx to another ctx for threaded code
*/
void CopySpsPps (PWelsDecoderContext pFromCtx, PWelsDecoderContext pToCtx);
/*!
*************************************************************************************
* \brief Initialize Wels decoder parameters and memory
*
* \param pCtx input context to be initialized at first stage
* \param pTraceHandle handle for trace
* \param pLo log info pointer
*
* \return 0 - successed
* \return 1 - failed
*
* \note N/A
*************************************************************************************
*/
int32_t WelsInitDecoder (PWelsDecoderContext pCtx, SLogContext* pLogCtx);
/*!
*************************************************************************************
* \brief Uninitialize Wels decoder parameters and memory
*
* \param pCtx input context to be uninitialized at release stage
*
* \return NONE
*
* \note N/A
*************************************************************************************
*/
void WelsEndDecoder (PWelsDecoderContext pCtx);
/*!
*************************************************************************************
* \brief First entrance to decoding core interface.
*
* \param pCtx decoder context
* \param pBufBs bit streaming buffer
* \param kBsLen size in bytes length of bit streaming buffer input
* \param ppDst picture payload data to be output
* \param pDstBufInfo buf information of ouput data
*
* \return 0 - successed
* \return 1 - failed
*
* \note N/A
*************************************************************************************
*/
int32_t WelsDecodeBs (PWelsDecoderContext pCtx, const uint8_t* kpBsBuf, const int32_t kiBsLen,
uint8_t** ppDst, SBufferInfo* pDstBufInfo, SParserBsInfo* pDstBsInfo);
/*
* request memory blocks for decoder avc part
*/
int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const int32_t kiMbHeight,
bool& bReallocFlag);
/*
* free memory dynamically allocated during decoder
*/
void WelsFreeDynamicMemory (PWelsDecoderContext pCtx);
/*!
* \brief make sure synchonozization picture resolution (get from slice header) among different parts (i.e, memory related and so on)
* over decoder internal
* ( MB coordinate and parts of data within decoder context structure )
* \param pCtx Wels decoder context
* \param iMbWidth MB width
* \pram iMbHeight MB height
* \return 0 - successful; none 0 - something wrong
*/
int32_t SyncPictureResolutionExt (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const int32_t kiMbHeight);
/*!
* \brief init decoder predictive function pointers including ASM functions during MB reconstruction
* \param pCtx Wels decoder context
* \param uiCpuFlag cpu assembly indication
*/
void InitPredFunc (PWelsDecoderContext pCtx, uint32_t uiCpuFlag);
/*!
* \brief init decoder internal function pointers including ASM functions
* \param pCtx Wels decoder context
* \param uiCpuFlag cpu assembly indication
*/
void InitDecFuncs (PWelsDecoderContext pCtx, uint32_t uiCpuFlag);
void GetVclNalTemporalId (PWelsDecoderContext pCtx); //get the info that whether or not have VCL NAL in current AU,
//and if YES, get the temporal ID
//reset decoder number related statistics info
void ResetDecStatNums (SDecoderStatistics* pDecStat);
//update information when freezing occurs, including IDR/non-IDR number
void UpdateDecStatFreezingInfo (const bool kbIdrFlag, SDecoderStatistics* pDecStat);
//update information when no freezing occurs, including QP, correct IDR number, ECed IDR number
void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx);
//update decoder statistics information
void UpdateDecStat (PWelsDecoderContext pCtx, const bool kbOutput);
//Destroy picutre buffer
void DestroyPicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, CMemoryAlign* pMa);
//reset picture reodering buffer list
void ResetReorderingPictureBuffers (PPictReoderingStatus pPictReoderingStatus, PPictInfo pPictInfo,
const bool& bFullReset);
#ifdef __cplusplus
}
#endif//__cplusplus
} // namespace WelsDec
#endif//WELS_DECODER_SYSTEM_ARCHITECTURE_H__

View File

@ -0,0 +1,587 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file decoder_context.h
*
* \brief mainly interface introduced in Wels decoder side
*
* \date 3/4/2009 Created
*
*************************************************************************************
*/
#ifndef WELS_DECODER_FRAMEWORK_H__
#define WELS_DECODER_FRAMEWORK_H__
#include "typedefs.h"
#include "utils.h"
#include "wels_const.h"
#include "wels_common_basis.h"
#include "wels_common_defs.h"
#include "codec_app_def.h"
#include "parameter_sets.h"
#include "nalu.h"
#include "dec_frame.h"
#include "pic_queue.h"
#include "vlc_decoder.h"
#include "fmo.h"
#include "crt_util_safe_x.h"
#include "mb_cache.h"
#include "expand_pic.h"
#include "mc.h"
#include "memory_align.h"
#include "wels_decoder_thread.h"
namespace WelsDec {
#define MAX_PRED_MODE_ID_I16x16 3
#define MAX_PRED_MODE_ID_CHROMA 3
#define MAX_PRED_MODE_ID_I4x4 8
#define WELS_QP_MAX 51
#define LONG_TERM_REF
#define IMinInt32 -0x7FFFFFFF
typedef struct SWels_Cabac_Element {
uint8_t uiState;
uint8_t uiMPS;
} SWelsCabacCtx, *PWelsCabacCtx;
typedef struct {
uint64_t uiRange;
uint64_t uiOffset;
int32_t iBitsLeft;
uint8_t* pBuffStart;
uint8_t* pBuffCurr;
uint8_t* pBuffEnd;
} SWelsCabacDecEngine, *PWelsCabacDecEngine;
#define NEW_CTX_OFFSET_MB_TYPE_I 3
#define NEW_CTX_OFFSET_SKIP 11
#define NEW_CTX_OFFSET_SUBMB_TYPE 21
#define NEW_CTX_OFFSET_B_SUBMB_TYPE 36
#define NEW_CTX_OFFSET_MVD 40
#define NEW_CTX_OFFSET_REF_NO 54
#define NEW_CTX_OFFSET_DELTA_QP 60
#define NEW_CTX_OFFSET_IPR 68
#define NEW_CTX_OFFSET_CIPR 64
#define NEW_CTX_OFFSET_CBP 73
#define NEW_CTX_OFFSET_CBF 85
#define NEW_CTX_OFFSET_MAP 105
#define NEW_CTX_OFFSET_LAST 166
#define NEW_CTX_OFFSET_ONE 227
#define NEW_CTX_OFFSET_ABS 232
#define NEW_CTX_OFFSET_TS_8x8_FLAG 399
#define CTX_NUM_MVD 7
#define CTX_NUM_CBP 4
// Table 9-34 in Page 270
#define NEW_CTX_OFFSET_TRANSFORM_SIZE_8X8_FLAG 399
#define NEW_CTX_OFFSET_MAP_8x8 402
#define NEW_CTX_OFFSET_LAST_8x8 417
#define NEW_CTX_OFFSET_ONE_8x8 426
#define NEW_CTX_OFFSET_ABS_8x8 431 // Puzzle, where is the definition?
typedef struct TagDataBuffer {
uint8_t* pHead;
uint8_t* pEnd;
uint8_t* pStartPos;
uint8_t* pCurPos;
} SDataBuffer;
//limit size for SPS PPS total permitted size for parse_only
#define SPS_PPS_BS_SIZE 128
typedef struct TagSpsBsInfo {
uint8_t pSpsBsBuf [SPS_PPS_BS_SIZE];
int32_t iSpsId;
uint16_t uiSpsBsLen;
} SSpsBsInfo;
typedef struct TagPpsBsInfo {
uint8_t pPpsBsBuf [SPS_PPS_BS_SIZE];
int32_t iPpsId;
uint16_t uiPpsBsLen;
} SPpsBsInfo;
//#ifdef __cplusplus
//extern "C" {
//#endif//__cplusplus
/*
* Need move below structures to function pointer to seperate module/file later
*/
//typedef int32_t (*rec_mb) (Mb *cur_mb, PWelsDecoderContext pCtx);
/*typedef for get intra predictor func pointer*/
typedef void (*PGetIntraPredFunc) (uint8_t* pPred, const int32_t kiLumaStride);
typedef void (*PIdctResAddPredFunc) (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
typedef void (*PIdctFourResAddPredFunc) (uint8_t* pPred, int32_t iStride, int16_t* pRs, const int8_t* pNzc);
typedef void (*PExpandPictureFunc) (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicWidth,
const int32_t kiPicHeight);
typedef void (*PGetIntraPred8x8Func) (uint8_t* pPred, const int32_t kiLumaStride, bool bTLAvail, bool bTRAvail);
/**/
typedef struct TagRefPic {
PPicture pRefList[LIST_A][MAX_DPB_COUNT]; // reference picture marking plus FIFO scheme
PPicture pShortRefList[LIST_A][MAX_DPB_COUNT];
PPicture pLongRefList[LIST_A][MAX_DPB_COUNT];
uint8_t uiRefCount[LIST_A];
uint8_t uiShortRefCount[LIST_A];
uint8_t uiLongRefCount[LIST_A]; // dependend on ref pic module
int32_t iMaxLongTermFrameIdx;
} SRefPic, *PRefPic;
typedef void (*PCopyFunc) (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
typedef struct TagCopyFunc {
PCopyFunc pCopyLumaFunc;
PCopyFunc pCopyChromaFunc;
} SCopyFunc;
//deblock module defination
struct TagDeblockingFunc;
typedef struct tagDeblockingFilter {
uint8_t* pCsData[3]; // pointer to reconstructed picture data
int32_t iCsStride[2]; // Cs stride
EWelsSliceType eSliceType;
int8_t iSliceAlphaC0Offset;
int8_t iSliceBetaOffset;
int8_t iChromaQP[2];
int8_t iLumaQP;
struct TagDeblockingFunc* pLoopf;
PPicture* pRefPics[LIST_A];
} SDeblockingFilter, *PDeblockingFilter;
typedef void (*PDeblockingFilterMbFunc) (PDqLayer pCurDqLayer, PDeblockingFilter filter, int32_t boundry_flag);
typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* iTc);
typedef void (*PLumaDeblockingEQ4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
typedef void (*PChromaDeblockingLT4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha,
int32_t iBeta, int8_t* iTc);
typedef void (*PChromaDeblockingEQ4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha,
int32_t iBeta);
typedef void (*PChromaDeblockingLT4Func2) (uint8_t* iSampleCbr, int32_t iStride, int32_t iAlpha,
int32_t iBeta, int8_t* iTc);
typedef void (*PChromaDeblockingEQ4Func2) (uint8_t* iSampleCbr, int32_t iStride, int32_t iAlpha,
int32_t iBeta);
typedef struct TagDeblockingFunc {
PLumaDeblockingLT4Func pfLumaDeblockingLT4Ver;
PLumaDeblockingEQ4Func pfLumaDeblockingEQ4Ver;
PLumaDeblockingLT4Func pfLumaDeblockingLT4Hor;
PLumaDeblockingEQ4Func pfLumaDeblockingEQ4Hor;
PChromaDeblockingLT4Func pfChromaDeblockingLT4Ver;
PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Ver;
PChromaDeblockingLT4Func pfChromaDeblockingLT4Hor;
PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Hor;
PChromaDeblockingLT4Func2 pfChromaDeblockingLT4Ver2;
PChromaDeblockingEQ4Func2 pfChromaDeblockingEQ4Ver2;
PChromaDeblockingLT4Func2 pfChromaDeblockingLT4Hor2;
PChromaDeblockingEQ4Func2 pfChromaDeblockingEQ4Hor2;
} SDeblockingFunc, *PDeblockingFunc;
typedef void (*PWelsNonZeroCountFunc) (int8_t* pNonZeroCount);
typedef void (*PWelsBlockZeroFunc) (int16_t* block, int32_t stride);
typedef struct TagBlockFunc {
PWelsNonZeroCountFunc pWelsSetNonZeroCountFunc;
PWelsBlockZeroFunc pWelsBlockZero16x16Func;
PWelsBlockZeroFunc pWelsBlockZero8x8Func;
} SBlockFunc;
typedef void (*PWelsFillNeighborMbInfoIntra4x4Func) (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
int8_t* pIntraPredMode, PDqLayer pCurDqLayer);
typedef void (*PWelsMapNeighToSample) (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail);
typedef void (*PWelsMap16NeighToSample) (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail);
typedef int32_t (*PWelsParseIntra4x4ModeFunc) (PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs,
PDqLayer pCurDqLayer);
typedef int32_t (*PWelsParseIntra16x16ModeFunc) (PWelsNeighAvail pNeighAvail, PBitStringAux pBs, PDqLayer pCurDqLayer);
enum {
OVERWRITE_NONE = 0,
OVERWRITE_PPS = 1,
OVERWRITE_SPS = 1 << 1,
OVERWRITE_SUBSETSPS = 1 << 2
};
//Decoder SPS and PPS global CTX
typedef struct tagWelsWelsDecoderSpsPpsCTX {
SPosOffset sFrameCrop;
SSps sSpsBuffer[MAX_SPS_COUNT + 1];
SPps sPpsBuffer[MAX_PPS_COUNT + 1];
SSubsetSps sSubsetSpsBuffer[MAX_SPS_COUNT + 1];
SNalUnit sPrefixNal;
PSps pActiveLayerSps[MAX_LAYER_NUM];
bool bAvcBasedFlag; // For decoding bitstream:
// for EC parameter sets
bool bSpsExistAheadFlag; // whether does SPS NAL exist ahead of sequence?
bool bSubspsExistAheadFlag;// whether does Subset SPS NAL exist ahead of sequence?
bool bPpsExistAheadFlag; // whether does PPS NAL exist ahead of sequence?
int32_t iSpsErrorIgnored;
int32_t iSubSpsErrorIgnored;
int32_t iPpsErrorIgnored;
bool bSpsAvailFlags[MAX_SPS_COUNT];
bool bSubspsAvailFlags[MAX_SPS_COUNT];
bool bPpsAvailFlags[MAX_PPS_COUNT];
int32_t iPPSLastInvalidId;
int32_t iPPSInvalidNum;
int32_t iSPSLastInvalidId;
int32_t iSPSInvalidNum;
int32_t iSubSPSLastInvalidId;
int32_t iSubSPSInvalidNum;
int32_t iSeqId; //sequence id
int iOverwriteFlags;
} SWelsDecoderSpsPpsCTX, *PWelsDecoderSpsPpsCTX;
//Last Decoded Picture Info
typedef struct tagSWelsLastDecPicInfo {
// Save the last nal header info
SNalUnitHeaderExt sLastNalHdrExt;
SSliceHeader sLastSliceHeader;
int32_t iPrevPicOrderCntMsb;
int32_t iPrevPicOrderCntLsb;
PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment
int32_t iPrevFrameNum;// frame number of previous frame well decoded for non-truncated mode yet
bool bLastHasMmco5;
uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
} SWelsLastDecPicInfo, *PWelsLastDecPicInfo;
typedef struct tagPictInfo {
SBufferInfo sBufferInfo;
int32_t iPOC;
int32_t iPicBuffIdx;
uint32_t uiDecodingTimeStamp;
bool bLastGOP;
} SPictInfo, *PPictInfo;
typedef struct tagPictReoderingStatus {
int32_t iPictInfoIndex;
int32_t iMinPOC;
int32_t iNumOfPicts;
int32_t iLastGOPRemainPicts;
int32_t iLastWrittenPOC;
int32_t iLargestBufferedPicIndex;
} SPictReoderingStatus, *PPictReoderingStatus;
/*
* SWelsDecoderContext: to maintail all modules data over decoder@framework
*/
typedef struct TagWelsDecoderContext {
SLogContext sLogCtx;
// Input
void*
pArgDec; // structured arguments for decoder, reserved here for extension in the future
SDataBuffer sRawData;
SDataBuffer sSavedData; //for parse only purpose
// Configuration
SDecodingParam* pParam;
uint32_t uiCpuFlag; // CPU compatibility detected
VIDEO_BITSTREAM_TYPE eVideoType; //indicate the type of video to decide whether or not to do qp_delta error detection.
bool bHaveGotMemory; // global memory for decoder context related ever requested?
int32_t iImgWidthInPixel; // width of image in pixel reconstruction picture to be output
int32_t iImgHeightInPixel;// height of image in pixel reconstruction picture to be output
int32_t
iLastImgWidthInPixel; // width of image in last successful pixel reconstruction picture to be output
int32_t
iLastImgHeightInPixel;// height of image in last successful pixel reconstruction picture to be output
bool bFreezeOutput; // indicating current frame freezing. Default: true
// Derived common elements
SNalUnitHeader sCurNalHead;
EWelsSliceType eSliceType; // Slice type
bool bUsedAsRef; //flag as ref
int32_t iFrameNum;
int32_t iErrorCode; // error code return while decoding in case packets lost
SFmo sFmoList[MAX_PPS_COUNT]; // list for FMO storage
PFmo pFmo; // current fmo context after parsed slice_header
int32_t iActiveFmoNum; // active count number of fmo context in list
/*needed info by decode slice level and mb level*/
int32_t
iDecBlockOffsetArray[24]; // address talbe for sub 4x4 block in intra4x4_mb, so no need to caculta the address every time.
struct {
uint32_t* pMbType[LAYER_NUM_EXCHANGEABLE]; /* mb type */
int16_t (*pMv[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE MB_BLOCK4x4_NUM*]
int8_t (*pRefIndex[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM];
int8_t (*pDirect[LAYER_NUM_EXCHANGEABLE])[MB_BLOCK4x4_NUM];
bool* pNoSubMbPartSizeLessThan8x8Flag[LAYER_NUM_EXCHANGEABLE];
bool* pTransformSize8x8Flag[LAYER_NUM_EXCHANGEABLE];
int8_t* pLumaQp[LAYER_NUM_EXCHANGEABLE]; /*mb luma_qp*/
int8_t (*pChromaQp[LAYER_NUM_EXCHANGEABLE])[2]; /*mb chroma_qp*/
int16_t (*pMvd[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE MB_BLOCK4x4_NUM*]
uint16_t* pCbfDc[LAYER_NUM_EXCHANGEABLE];
int8_t (*pNzc[LAYER_NUM_EXCHANGEABLE])[24];
int8_t (*pNzcRs[LAYER_NUM_EXCHANGEABLE])[24];
int16_t (*pScaledTCoeff[LAYER_NUM_EXCHANGEABLE])[MB_COEFF_LIST_SIZE]; /*need be aligned*/
int8_t (*pIntraPredMode[LAYER_NUM_EXCHANGEABLE])[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
int8_t (*pIntra4x4FinalMode[LAYER_NUM_EXCHANGEABLE])[MB_BLOCK4x4_NUM];
uint8_t* pIntraNxNAvailFlag[LAYER_NUM_EXCHANGEABLE];
int8_t* pChromaPredMode[LAYER_NUM_EXCHANGEABLE];
int8_t* pCbp[LAYER_NUM_EXCHANGEABLE];
uint8_t (*pMotionPredFlag[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_PARTITION_SIZE]; // 8x8
uint32_t (*pSubMbType[LAYER_NUM_EXCHANGEABLE])[MB_SUB_PARTITION_SIZE];
int32_t* pSliceIdc[LAYER_NUM_EXCHANGEABLE]; // using int32_t for slice_idc
int8_t* pResidualPredFlag[LAYER_NUM_EXCHANGEABLE];
int8_t* pInterPredictionDoneFlag[LAYER_NUM_EXCHANGEABLE];
bool* pMbCorrectlyDecodedFlag[LAYER_NUM_EXCHANGEABLE];
bool* pMbRefConcealedFlag[LAYER_NUM_EXCHANGEABLE];
uint32_t iMbWidth;
uint32_t iMbHeight;
} sMb;
// reconstruction picture
PPicture pDec; //pointer to current picture being reconstructed
PPicture
pTempDec; //pointer to temp decoder picture to be used only for Bi Prediction.
// reference pictures
SRefPic sRefPic;
SRefPic sTmpRefPic; //used to temporarily save RefPic for next active thread
SVlcTable* pVlcTable; // vlc table
SBitStringAux sBs;
int32_t iMaxBsBufferSizeInByte; //actual memory size for BS buffer
/* Global memory external */
SWelsDecoderSpsPpsCTX sSpsPpsCtx;
bool bHasNewSps;
SPosOffset sFrameCrop;
PSliceHeader pSliceHeader;
PPicBuff pPicBuff; // Initially allocated memory for pictures which are used in decoding.
int32_t iPicQueueNumber;
PAccessUnit pAccessUnitList; // current access unit list to be performed
//PSps pActiveLayerSps[MAX_LAYER_NUM];
PSps pSps; // used by current AU
PPps pPps; // used by current AU
// Memory for pAccessUnitList is dynamically held till decoder destruction.
PDqLayer
pCurDqLayer; // current DQ layer representation, also carry reference base layer if applicable
PDqLayer pDqLayersList[LAYER_NUM_EXCHANGEABLE]; // DQ layers list with memory allocated
PNalUnit pNalCur; // point to current NAL Nnit
uint8_t uiNalRefIdc; // NalRefIdc for easy access;
int32_t iPicWidthReq; // picture width have requested the memory
int32_t iPicHeightReq; // picture height have requested the memory
uint8_t uiTargetDqId; // maximal DQ ID in current access unit, meaning target layer ID
//bool bAvcBasedFlag; // For decoding bitstream:
bool bEndOfStreamFlag; // Flag on end of stream requested by external application layer
bool bInstantDecFlag; // Flag for no-delay decoding
bool bInitialDqLayersMem; // dq layers related memory is available?
bool bOnlyOneLayerInCurAuFlag; //only one layer in current AU: 1
bool bReferenceLostAtT0Flag;
int32_t iTotalNumMbRec; //record current number of decoded MB
#ifdef LONG_TERM_REF
bool bParamSetsLostFlag; //sps or pps do not exist or not correct
bool
bCurAuContainLtrMarkSeFlag; //current AU has the LTR marking syntax element, mark the previous frame or self
int32_t iFrameNumOfAuMarkedLtr; //if bCurAuContainLtrMarkSeFlag==true, SHOULD set this variable
uint16_t uiCurIdrPicId;
#endif
bool bNewSeqBegin;
bool bNextNewSeqBegin;
//for Parse only
bool bFramePending;
bool bFrameFinish;
int32_t iNalNum;
int32_t iMaxNalNum; //permitted max NAL num stored in parser
SSpsBsInfo sSpsBsInfo [MAX_SPS_COUNT];
SSpsBsInfo sSubsetSpsBsInfo [MAX_PPS_COUNT];
SPpsBsInfo sPpsBsInfo [MAX_PPS_COUNT];
SParserBsInfo* pParserBsInfo;
//PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment
PGetIntraPredFunc pGetI16x16LumaPredFunc[7]; //h264_predict_copy_16x16;
PGetIntraPredFunc pGetI4x4LumaPredFunc[14]; // h264_predict_4x4_t
PGetIntraPredFunc pGetIChromaPredFunc[7]; // h264_predict_8x8_t
PIdctResAddPredFunc pIdctResAddPredFunc;
PIdctFourResAddPredFunc pIdctFourResAddPredFunc;
SMcFunc sMcFunc;
//Transform8x8
PGetIntraPred8x8Func pGetI8x8LumaPredFunc[14];
PIdctResAddPredFunc pIdctResAddPredFunc8x8;
//For error concealment
SCopyFunc sCopyFunc;
/* For Deblocking */
SDeblockingFunc sDeblockingFunc;
SExpandPicFunc sExpandPicFunc;
/* For Block */
SBlockFunc sBlockFunc;
int32_t iCurSeqIntervalTargetDependId;
int32_t iCurSeqIntervalMaxPicWidth;
int32_t iCurSeqIntervalMaxPicHeight;
PWelsFillNeighborMbInfoIntra4x4Func pFillInfoCacheIntraNxNFunc;
PWelsMapNeighToSample pMapNxNNeighToSampleFunc;
PWelsMap16NeighToSample pMap16x16NeighToSampleFunc;
//feedback whether or not have VCL in current AU, and the temporal ID
int32_t iFeedbackVclNalInAu;
int32_t iFeedbackTidInAu;
int32_t iFeedbackNalRefIdc;
bool bAuReadyFlag; // true: one au is ready for decoding; false: default value
bool bPrintFrameErrorTraceFlag; //true: can print info for upper layer
int32_t iIgnoredErrorInfoPacketCount; //store the packet number with error decoding info
//trace handle
void* pTraceHandle;
PWelsLastDecPicInfo pLastDecPicInfo;
SWelsCabacCtx sWelsCabacContexts[4][WELS_QP_MAX + 1][WELS_CONTEXT_COUNT];
bool bCabacInited;
SWelsCabacCtx pCabacCtx[WELS_CONTEXT_COUNT];
PWelsCabacDecEngine pCabacDecEngine;
double dDecTime;
SDecoderStatistics* pDecoderStatistics; // For real time debugging
int32_t iMbEcedNum;
int32_t iMbEcedPropNum;
int32_t iMbNum;
bool bMbRefConcealed;
bool bRPLRError;
int32_t iECMVs[16][2];
PPicture pECRefPic[16];
unsigned long long uiTimeStamp;
uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
// To support scaling list HP
uint16_t pDequant_coeff_buffer4x4[6][52][16];
uint16_t pDequant_coeff_buffer8x8[6][52][64];
uint16_t (*pDequant_coeff4x4[6])[16];// 4x4 sclaing list value pointer
uint16_t (*pDequant_coeff8x8[6])[64];//64 residual coeff ,with 6 kinds of residual type, 52 qp level
int iDequantCoeffPpsid;//When a new pps actived, reinitialised the scaling list value
bool bDequantCoeff4x4Init;
bool bUseScalingList;
CMemoryAlign* pMemAlign;
void* pThreadCtx;
void* pLastThreadCtx;
WELS_MUTEX* pCsDecoder;
int16_t lastReadyHeightOffset[LIST_A][MAX_REF_PIC_COUNT]; //last ready reference MB offset
PPictInfo pPictInfoList;
PPictReoderingStatus pPictReoderingStatus;
SBufferInfo* pDstInfo;
} SWelsDecoderContext, *PWelsDecoderContext;
typedef struct tagSWelsDecThread {
SWelsDecSemphore* sIsBusy;
SWelsDecSemphore sIsActivated;
SWelsDecSemphore sIsIdle;
SWelsDecThread sThrHandle;
uint32_t uiCommand;
uint32_t uiThrNum;
uint32_t uiThrMaxNum;
uint32_t uiThrStackSize;
DECLARE_PROCTHREAD_PTR (pThrProcMain);
} SWelsDecThreadInfo, *PWelsDecThreadInfo;
typedef struct tagSWelsDecThreadCtx {
SWelsDecThreadInfo sThreadInfo;
PWelsDecoderContext pCtx;
void* threadCtxOwner;
uint8_t* kpSrc;
int32_t kiSrcLen;
uint8_t** ppDst;
SBufferInfo sDstInfo;
PPicture pDec;
SWelsDecEvent sImageReady;
SWelsDecEvent sSliceDecodeStart;
SWelsDecEvent sSliceDecodeFinish;
int32_t iPicBuffIdx; //picBuff Index
} SWelsDecoderThreadCTX, *PWelsDecoderThreadCTX;
static inline void ResetActiveSPSForEachLayer (PWelsDecoderContext pCtx) {
if (pCtx->iTotalNumMbRec == 0) {
for (int i = 0; i < MAX_LAYER_NUM; i++) {
pCtx->sSpsPpsCtx.pActiveLayerSps[i] = NULL;
}
}
}
static inline int32_t GetThreadCount (PWelsDecoderContext pCtx) {
int32_t iThreadCount = 0;
if (pCtx->pThreadCtx != NULL) {
PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
iThreadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
}
return iThreadCount;
}
//GetPrevFrameNum only applies when thread count >= 2
static inline int32_t GetPrevFrameNum (PWelsDecoderContext pCtx) {
if (pCtx->uiDecodingTimeStamp > 0) {
PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
int32_t iThreadCount = int32_t (pThreadCtx->sThreadInfo.uiThrMaxNum);
int32_t uiThrNum = int32_t (pThreadCtx->sThreadInfo.uiThrNum);
for (int32_t i = 0; i < iThreadCount; ++i) {
int32_t id = i - uiThrNum;
if (id != 0 && pThreadCtx[id].pCtx->uiDecodingTimeStamp == pCtx->uiDecodingTimeStamp - 1) {
if (pThreadCtx[id].pCtx->pDec != NULL) {
int32_t iFrameNum = pThreadCtx[id].pCtx->pDec->iFrameNum;
if (iFrameNum >= 0) return iFrameNum;
}
return pThreadCtx[id].pCtx->iFrameNum;
}
}
}
return pCtx->pLastDecPicInfo->iPrevFrameNum;
}
//#ifdef __cplusplus
//}
//#endif//__cplusplus
} // namespace WelsDec
#endif//WELS_DECODER_FRAMEWORK_H__

View File

@ -0,0 +1,206 @@
/*!
* \copy
* Copyright (c) 2008-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* decoder_core.h
*
* Abstract
* Encapsulative core interfaces
*
* History
* 07/10/2008 Created
*
*****************************************************************************/
#ifndef WELS_DECODER_CORE_H__
#define WELS_DECODER_CORE_H__
#include "typedefs.h"
#include "wels_common_basis.h"
#include "decoder_context.h"
#include "codec_def.h"
namespace WelsDec {
/*
* InitBsBuffer
* Memory allocation for Bitstream Buffer
* return:
* 0 - success; otherwise returned error_no defined in error_no.h.
*/
int32_t InitBsBuffer (PWelsDecoderContext pCtx);
/*
* ExpandBsBuffer
* Expand current BS buffer and copy its content
* new buffer size will consider input size as a reference
* return:
* 0 - success; otherwise returned error_no defined in error_no.h.
*/
int32_t ExpandBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen);
/*
* ExpandBsLenBuffer
* Expand current BS length buffer to double size or maximum, due to max slice number exceeding
* Parameter:
* kiCurrLen: current value of total nal number (including non-VCL nal)
* return:
* 0 - success; otherwise returned error_no defined in error_no.h.
*/
int32_t ExpandBsLenBuffer (PWelsDecoderContext pCtx, const int32_t kiCurrLen);
/*
* CheckBsBuffer
* Check if current buffer size is enough
*/
int32_t CheckBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen);
/*
* WelsInitStaticMemory
* Memory request for introduced data at decoder start
* Especially for:
* rbsp_au_buffer, cur_dq_layer_ptr and ref_dq_layer_ptr in MB info cache.
* return:
* 0 - success; otherwise returned error_no defined in error_no.h.
*/
int32_t WelsInitStaticMemory (PWelsDecoderContext pCtx);
/*
* WelsFreeStaticMemory
* Free memory introduced in WelsInitStaticMemory at destruction of decoder.
*
*/
void WelsFreeStaticMemory (PWelsDecoderContext pCtx);
/*!
* \brief request memory when maximal picture width and height are available
*/
int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight);
/*!
* \brief free dq layer context memory related
*/
void UninitialDqLayersContext (PWelsDecoderContext pCtx);
/*
* DecodeNalHeaderExt
* Trigger condition: NAL_UNIT_TYPE = NAL_UNIT_PREFIX or NAL_UNIT_CODED_SLICE_EXT
* Parameter:
* pNal: target NALUnit ptr
* pSrc: NAL Unit bitstream
*/
void DecodeNalHeaderExt (PNalUnit pNal, uint8_t* pSrc);
/*
* ParseSliceHeaderSyntaxs
* Parse slice header of bitstream
*/
int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag);
/*
* Copy relative syntax elements of NALUnitHeaderExt, sRefPicBaseMarking and bStoreRefBasePicFlag in prefix nal unit.
* pSrc: mark as decoded prefix NAL
* pDst: succeeded VCL NAL based AVC (I/P Slice)
*/
bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kpDst, PNalUnit const kpSrc);
/*
* WelsDecodeInitAccessUnitStart
* check and (re)allocate picture buffers on new sequence begin
* bit_len: size in bit length of data
* buf_len: size in byte length of data
* coded_au: mark an Access Unit decoding finished
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t WelsDecodeInitAccessUnitStart (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo);
/*
* AllocPicBuffOnNewSeqBegin
* check and (re)allocate picture buffers on new sequence begin
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx);
/*
* InitConstructAccessUnit
* Init before constructing an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
* joint a collective access unit.
* parameter\
* SBufferInfo: Buffer info
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t InitConstructAccessUnit (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo);
/*
* ConstructAccessUnit
* construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
* joint a collective access unit.
* parameter\
* buf: bitstream data buffer
* bit_len: size in bit length of data
* buf_len: size in byte length of data
* coded_au: mark an Access Unit decoding finished
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo);
/*
* DecodeCurrentAccessUnit
* Decode current access unit when current AU is completed.
*/
int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo);
/*
* Check if frame is completed and EC is required
*/
bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** pDst, SBufferInfo* pDstInfo);
/*
* Prepare current dq layer context initialization.
*/
void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pSps, PPps pPps);
int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx);
void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx);
void DecodeFinishUpdate (PWelsDecoderContext pCtx);
void ForceResetCurrentAccessUnit (PAccessUnit pAu);
void ForceClearCurrentNal (PAccessUnit pAu);
bool CheckRefPicturesComplete (PWelsDecoderContext pCtx); // Check whether all ref pictures are complete
void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx);
} // namespace WelsDec
#endif//WELS_DECODER_CORE_H__

Some files were not shown because too many files have changed in this diff Show More